Exemple #1
0
def entrypoint(argv):
    if len(argv) == 1:
        die('must supply a subcommand: "hpc", "integrate", "ljob", "preprays", "sde", "view", "vkl"')

    if argv[1] == 'hpc':
        from ..hpc import entrypoint
        entrypoint(['vernon ' + argv[1]] + argv[2:])
    elif argv[1] == 'integrate':
        from ..integrate import entrypoint
        entrypoint(['vernon ' + argv[1]] + argv[2:])
    elif argv[1] == 'ljob':
        farm_out_to_ljob(['vernon ' + argv[1]] + argv[2:])
    elif argv[1] == 'preprays':
        from ..preprays import entrypoint
        entrypoint(['vernon ' + argv[1]] + argv[2:])
    elif argv[1] == 'sde':
        from ..sde import entrypoint
        entrypoint(['vernon ' + argv[1]] + argv[2:])
    elif argv[1] == 'view':
        from .view import entrypoint
        entrypoint(['vernon ' + argv[1]] + argv[2:])
    elif argv[1] == 'vkl':
        from ..vkl import entrypoint
        entrypoint(['vernon ' + argv[1]] + argv[2:])
    else:
        die('unrecognized subcommand %r', argv[1])
Exemple #2
0
def dump_task_log(jobdir, taskid):
    """Helper function for extracting the log for a given attempt from a queue-job
    work directory. Used by the 'ljob tasklog' command, so this is written
    in a completely non-library style. Will dump output from all relevant
    attempts.

    """
    taskid = six.text_type(taskid)
    attempts = []
    workerid = None

    with io.open(os.path.join(jobdir, 'attempts.log'), 'rt') as f:
        for line in f:
            pieces = line.split()

            if pieces[1] != 'issued':
                continue
            if pieces[5] == taskid:
                attempts.append(
                    (pieces[3], pieces[4]))  # worker ID, attempt ID

    if not len(attempts):
        die('cannot find any attempts for task %s', taskid)

    _dump_worker_attempt_log(jobdir, *attempts[0])

    for attinfo in attempts[1:]:
        print()
        _dump_worker_attempt_log(jobdir, *attinfo)
Exemple #3
0
    def connect(self):
        self.attlog = LogFile('bulk/%s/attempts.log' % self.jobid)
        self.stdiolog = io.open('bulk/%s/stdio.log' % self.jobid,
                                'wt',
                                buffering=1)

        dest = port = cookie = None

        for i in range(6):
            try:
                with io.open('jobinfo/sockinfo.txt', 'rt') as f:
                    dest = f.readline().strip()
                    port = int(f.readline())
                    cookie = f.readline().strip()
                break
            except Exception as e:
                warn('failed to read master socket info: %s', e)
            time.sleep(10)

        if cookie is None:
            die('took too long to find master; giving up')

        log('connecting to %s/%d', dest, port)
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.connect((dest, port))
        sock.setblocking(0)
        self.sock = sock
        self.msgreader = JsonMessageReader(sock)
        self.send(
            'hello', {
                'jobid': six.text_type(self.jobid),
                'cookie': six.text_type(cookie),
                'job_array_id': six.text_type(self.job_array_id),
            })
Exemple #4
0
def compute_cli(args):
    settings = make_compute_parser().parse_args(args=args)
    config = PrepraysConfiguration.from_toml(settings.config_path)

    imaker = config.get_prep_rays_imaker(settings.cml)

    ray_parameters = common_ray_parameters + imaker.setup.distrib._parameter_names
    n_vals = len(ray_parameters)
    data = np.zeros((n_vals, settings.n_cols_to_compute, config.max_n_samps))
    n_samps = np.zeros((settings.n_cols_to_compute,), dtype=np.int)

    for i in range(settings.n_cols_to_compute):
        ray = imaker.get_ray(settings.start_col + i, settings.row_num)

        if ray.s.size > config.max_n_samps:
            die('too many samples required for ray at ix=%d iy=%d: max=%d, got=%d',
                settings.start_col + i, settings.row_num, config.max_n_samps, ray.s.size)

        n_samps[i] = ray.s.size
        sl = slice(0, ray.s.size)

        for j, pname in enumerate(ray_parameters):
            data[j,i,sl] = getattr(ray, pname)

    obs_max_n_samps = n_samps.max()
    data = data[:,:,:obs_max_n_samps]

    fn = 'archive/frame%04d_%04d_%04d.npy' % \
         (settings.frame_num, settings.row_num, settings.start_col)

    with io.open(fn, 'wb') as f:
        np.save(f, n_samps)
        np.save(f, data)
Exemple #5
0
def prep_and_image_ui(pre_args, settings, config):
    if not Path('Config.toml').exists():
        die('expected "Config.toml" in current directory')

    os.mkdir('preprays')
    os.mkdir('integrate')

    with open('preprays/tasks', 'wb') as tasks:
        subprocess.check_call(
            ['vernon', 'preprays', 'seed',
             '-c', 'Config.toml',
             '-g', str(config.preprays_n_col_groups)
            ],
            stdout = tasks,
        )

    masterid = config.launch_ljob('preprays', 'preprays')
    nextid = config.schedule_next_stage(
        'pandi_pr_assemble',
        pre_args + ['--stage=pr_assemble', '--previd=%s' % masterid],
        masterid,
    )

    print('Preprays ljob master ID:', masterid)
    print('Next-stage job ID:', nextid)

    with open('pandi_launch.log', 'wt') as log:
        print('Preprays ljob master ID:', masterid, file=log)
        print('Next-stage job ID:', nextid, file=log)
Exemple #6
0
def entrypoint(argv):
    if len(argv) == 1:
        die('must supply a subcommand: "prep-and-image"')

    if argv[1] == 'prep-and-image':
        prep_and_image_cli(argv) # note, full argv!
    else:
        die('unrecognized subcommand %r', argv[1])
Exemple #7
0
    def validate(self):
        p = Path(self.nn_path)
        if p != p.absolute():
            die('neural-net path must be absolute; got "%s"' % p)

        nn_cfg = p / 'nn_config.toml'
        if not nn_cfg.exists():
            die('bad setting for neural-net path: no such file %s', nn_cfg)
Exemple #8
0
def entrypoint(argv):
    if len(argv) == 1:
        die('must supply a subcommand: "gen-grid", "test-coeffs"')

    if argv[1] == 'gen-grid':
        gen_grid_cli(argv[2:])
    elif argv[1] == 'test-coeffs':
        test_coeffs_cli(argv[2:])
    else:
        die('unrecognized subcommand %r', argv[1])
Exemple #9
0
def farm_out_to_ljob(argv):
    "ljob is a shell script"
    import os
    from pwkit.io import Path

    if 'TOP' not in os.environ:
        die('ljob command must be run with the environment variable $TOP set to a directory')

    ljob_support_dir = (Path(__file__).parent.parent / 'ljob_support').resolve()
    os.environ['LJOB_SUPPORT'] = str(ljob_support_dir)
    ljob_script = ljob_support_dir / 'ljob.sh'
    os.execv(str(ljob_script), argv)
Exemple #10
0
def commandline(argv=None):
    if argv is None:
        argv = sys.argv

    if len(argv) < 2:
        die('usage: {ljob} launch')

    command = argv[1]

    if command == 'launch':
        do_launch(argv)
    else:
        die('unrecognized subcommand "%s"', command)
Exemple #11
0
def entrypoint(args):
    if not len(args):
        cli.die('must provide a subcommand: "cube", "hdf5"')

    subcommand = args[0]
    remaining_args = args[1:]

    if subcommand == 'cube':
        view_cube_cli(remaining_args)
    elif subcommand == 'hdf5':
        view_hdf5_cli(remaining_args)
    else:
        cli.die('unrecognized subcommand %r' % (subcommand, ))
Exemple #12
0
def nninit_cli(settings):
    nndir = Path(settings.nndir)

    try:
        nndir.mkdir()
    except OSError as e:
        if e.errno == 17:
            die('directory \"%s\" already exists' % settings.nndir)
        raise

    cfg_path = nndir / 'nn_config.toml'
    with cfg_path.open('wt') as f:
        pytoml.dump(f, NNINIT_DEFAULT_CONFIG)
Exemple #13
0
def view_cli(args):
    if len(args) == 0:
        die('must supply a sub-subcommand: "lc", "rot", "specmovie", "specseq"'
            )

    if args[0] == 'lc':
        view_lc_cli(args[1:])
    elif args[0] == 'rot':
        view_rot_cli(args[1:])
    elif args[0] == 'specmovie':
        view_specmovie_cli(args[1:])
    elif args[0] == 'specseq':
        view_specseq_cli(args[1:])
    else:
        die('unrecognized sub-subcommand %r', args[0])
Exemple #14
0
def entrypoint(argv):
    if len(argv) == 1:
        die('must supply a subcommand: "assemble", "gen-grid-config", "seed", "test-approx"')

    if argv[1] == 'assemble':
        assemble_cli(argv[2:])
    elif argv[1] == 'gen-grid-config':
        GriddedPrepraysConfiguration.generate_config_cli('preprays gen-grid-config', argv[2:])
    elif argv[1] == 'seed':
        seed_cli(argv[2:])
    elif argv[1] == '_compute':
        compute_cli(argv[2:])
    elif argv[1] == 'test-approx':
        test_approx_cli(argv[2:])
    else:
        die('unrecognized subcommand %r', argv[1])
Exemple #15
0
def entrypoint(argv):
    if len(argv) == 1:
        die('must supply a subcommand: "assemble", "framegrab", "movie", "seed", "view"'
            )

    if argv[1] == 'seed':
        seed_cli(argv[2:])
    elif argv[1] == '_integrate':
        integrate_cli(argv[2:])
    elif argv[1] == 'assemble':
        assemble_cli(argv[2:])
    elif argv[1] == 'view':
        view_cli(argv[2:])
    elif argv[1] == 'movie':
        movie_cli(argv[2:])
    elif argv[1] == 'framegrab':
        framegrab_cli(argv[2:])
    else:
        die('unrecognized subcommand %r', argv[1])
Exemple #16
0
def entrypoint(argv):
    if len(argv) == 1:
        die('must supply a subcommand: "average", "cube-to-particles", "forward", "gen-gg-config", "gen-grid"'
            )

    if argv[1] == 'average':
        average_cli(argv[2:])
    elif argv[1] == 'cube-to-particles':
        cube_to_particles_cli(argv[2:])
    elif argv[1] == 'forward':
        forward_cli(argv[2:])
    elif argv[1] == 'gen-gg-config':
        from .grid import GenGridTask
        GenGridTask.generate_config_cli('sde gen-gg-config', argv[2:])
    elif argv[1] == 'gen-grid':
        from .grid import gen_grid_cli
        gen_grid_cli(argv[2:])
    else:
        die('unrecognized subcommand %r', argv[1])
Exemple #17
0
def do_launch(argv):
    """We're a master or worker job, which we can distinguish from the
    environment variable LJOB_IS_MASTER. (We could avoid needing that
    environment variable, but it's easy and fine that way.) We take a unique
    identifier from the LJOB_PROC_ID variable (which allows us not to have to
    worry about what particular mechanism was used to launch us).

    """
    ismaster = os.environ.get('LJOB_IS_MASTER')
    if ismaster is None:
        die('framework failure: no environment variable LJOB_IS_MASTER')

    jobid = os.environ.get('LJOB_PROC_ID')
    if jobid is None:
        die('framework failure: no environment variable LJOB_PROC_ID')

    if ismaster == 'y':
        launch_master(jobid)
    else:
        launch_worker(jobid)
Exemple #18
0
def prep_and_image_cli(argv):
    # When we're sub-executed by the "vernon" CLI driver, our argv[0] becomes
    # "vernon hpc". We need to re-split that so that our Slurm scripts invoke
    # the right thing.

    pre_args = argv[:2]
    if ' ' in argv[0]:
        pre_args = pre_args[0].split() + pre_args[1:]

    settings = make_prep_and_image_parser().parse_args(args=argv[2:])
    config = HPCConfiguration.from_toml('Config.toml')

    if settings.stage is None:
        prep_and_image_ui(pre_args, settings, config)
    elif settings.stage == 'pr_assemble':
        prep_and_image_pr_assemble(pre_args, settings, config)
    elif settings.stage == 'integ_assemble':
        prep_and_image_integ_assemble(pre_args, settings, config)
    else:
        die('unknown prep-and-image stage %r', settings.stage)
Exemple #19
0
def ms_transpose (vpath, tpath, transpose_args):
    squash_time_gaps = transpose_args.pop ('squash_time_gaps', 'n')
    if squash_time_gaps == 'y':
        squash_time_gaps = True
    elif squash_time_gaps == 'n':
        squash_time_gaps = False
    else:
        die ('"squash_time_gaps" keyword must be either "y" or "n"')

    try:
        return _ms_transpose (vpath, tpath, transpose_args,
                              squash_time_gaps=squash_time_gaps)
    except:
        # The unlink can overwrite the exception info that
        # an argumentless "raise" would reraise.
        t, v, tb = sys.exc_info ()
        try:
            os.unlink (tpath)
        except:
            pass
        raise
Exemple #20
0
def grep_attempt_log(jobdir, regex_str, mode='grep'):
    """Helper function for grepping through attempt logs. Used by the 'ljob
    att(un)grep' commands, so this is written in a completely non-library
    style.

    """
    import re

    try:
        regex = re.compile(regex_str)
    except Exception as e:
        die('cannot compile regular expression %r: %s', regex_str, e)

    if mode == 'grep':
        func = _grep_worker_attempt_log
    elif mode == 'ungrep':
        func = _ungrep_worker_attempt_log
    else:
        die('internal bug, unknown mode %r', mode)

    for workerid in os.listdir(os.path.join(jobdir, 'bulkdata')):
        func(jobdir, workerid, regex)
Exemple #21
0
def _dump_worker_attempt_log(jobdir, workerid, attid):
    startofs = endofs = elapsed = failcode = None

    with io.open(os.path.join(jobdir, 'bulkdata', workerid, 'attempts.log'),
                 'rt') as f:
        for line in f:
            pieces = line.split()

            if pieces[3] != attid:
                continue

            if pieces[1] in ('starting', 'complete'):
                if pieces[1][0] == 's':
                    startofs = int(pieces[5])
                else:
                    endofs = int(pieces[5])
                    elapsed = int(pieces[6])
                    failcode = int(pieces[7])

                if startofs is not None and endofs is not None:
                    break

    if startofs is None or endofs is None:
        die('cannot find offset information in logs of worker %s', workerid)

    ntoread = endofs - startofs

    print('==> attempt %s worker=%s elapsed=%d failcode=%d <==' %
          (attid, workerid, elapsed, failcode))

    with io.open(os.path.join(jobdir, 'bulkdata', workerid, 'stdio.log'),
                 'rb') as f:
        f.seek(startofs, 0)

        while ntoread > 0:
            d = f.read(min(ntoread, 4096))
            sys.stdout.write(d)
            ntoread -= len(d)
Exemple #22
0
def load_data_and_train(datadir, nndir, result_name):
    from .impl import NSModel

    cfg_path = Path(nndir) / 'nn_config.toml'
    dr, rinfo = DomainRange.from_serialized(cfg_path,
                                            result_to_extract=result_name)

    if rinfo is None:
        die('no known result named %r', result_name)

    sd = dr.load_and_normalize(datadir)

    trainer_name = rinfo['trainer']
    trainer_func = globals().get(trainer_name + '_trainer')
    if trainer_func is None:
        die('unknown trainer function %r', trainer_name)

    print('Training with scheme \"%s\"' % trainer_name)
    m = NSModel()
    m.ns_setup(rinfo['_index'], sd)
    t0 = time.time()
    trainer_func(m)
    m.training_wall_clock = time.time() - t0
    return m
Exemple #23
0
def entrypoint(argv):
    ap = argparse.ArgumentParser(prog='neurosynchro')
    subparsers = ap.add_subparsers(dest='subcommand',
                                   metavar='<command>',
                                   help='The sub-command to invoke')

    make_nninit_parser(
        subparsers.add_parser(
            'init-nndir',
            help=
            'Initialize a directory to save the neural network training data'))

    make_ldr_parser(
        subparsers.add_parser(
            'lock-domain-range',
            help='Find the domain and range of the training set'))

    make_summarize_parser(
        subparsers.add_parser(
            'summarize',
            help='Print summary statistics about the training set'))

    from .grtrans import make_parser as make_grtrans_parser
    make_grtrans_parser(
        subparsers.add_parser('test-grtrans',
                              help='Do a test integration with grtrans'))

    make_transform_parser(
        subparsers.add_parser(
            'transform',
            help=
            'Transform the training set into Neurosynchro\'s internal parametrization',
            description=
            'Transform the training set into Neurosynchro\'s internal parametrization.',
            epilog=
            '''The training set can have arbitrary input parameters, but should have eight
output parameters named `j_I`, `j_Q`, `j_V`, `alpha_I`, `alpha_Q`, `alpha_V`,
`rho_Q`, `rho_V` -- these are the standard Stokes-basis radiative transfer
coefficients. The transformed training set will be printed to standard output,
so you almost surely want to redirect the output of this program to a file.''')
    )

    from .train import make_parser as make_train_parser
    make_train_parser(
        subparsers.add_parser('train',
                              help='Train one of the neural networks'))

    settings = ap.parse_args(argv[1:])

    if settings.subcommand is None:
        die('you must supply a subcommand; run with "--help" for help')

    if settings.subcommand == 'init-nndir':
        nninit_cli(settings)
    elif settings.subcommand == 'lock-domain-range':
        lock_domain_range_cli(settings)
    elif settings.subcommand == 'summarize':
        summarize_cli(settings)
    elif settings.subcommand == 'test-grtrans':
        from .grtrans import grtrans_cli
        grtrans_cli(settings)
    elif settings.subcommand == 'train':
        from .train import train_cli
        train_cli(settings)
    elif settings.subcommand == 'transform':
        transform_cli(settings)
    else:
        # argparse will error out if it the user gives an unrecognized
        # subcommand, so if we get here it's an internal bug
        assert False, 'internal bug: forgot to handle subcommand!'
Exemple #24
0
def doit(driver, args):
    # Load up the driver code

    try:
        text = open(driver).read()
    except Exception as e:
        cli.die('cannot read driver file "%s": %s', driver, e)

    try:
        code = compile(text, driver, 'exec')
    except Exception as e:
        if 'OMEGAFIG_BACKTRACE' in os.environ:
            raise
        cli.die('cannot compile driver file "%s": %s', driver, e)

    ns = {'__file__': driver, '__name__': '__omegafig__'}

    try:
        exec(code, ns)
    except Exception as e:
        if 'OMEGAFIG_BACKTRACE' in os.environ:
            raise
        cli.die('cannot execute driver file "%s": %s', driver, e)

    pfunc = ns.get('plot')
    if pfunc is None:
        cli.die('driver file "%s" does not provide a function called "plot"',
                driver)
    if not callable(pfunc):
        cli.die(
            'driver file "%s" provides something called "plot", but it\'s '
            'not a function', driver)

    # Deal with args

    try:
        code = pfunc.__code__
    except AttributeError:
        code = pfunc.func_code

    nargs = code.co_argcount
    argnames = code.co_varnames

    keywords = []
    nonkeywords = []

    for arg in args:
        if '=' in arg:
            keywords.append(arg)
        else:
            nonkeywords.append(arg)

    if len(nonkeywords) != nargs:
        cli.die('expected %d non-keyword arguments to driver, but got %d',
                nargs, len(nonkeywords))

    config = Config()
    defaults = ns.get('figdefaults')

    if defaults is not None:
        for key in defaults:
            setattr(config, key, defaults[key])

    config.parse(keywords)

    # Set up omegaplot globals as appropriate

    if config.pango:
        try:
            import omega.pango_g3 as ompango
        except ImportError:
            import omega.pango_g2 as ompango

        fontparams = {}
        if config.pangofamily is not None:
            fontparams['family'] = config.pangofamily
        if config.pangosize is not None:
            fontparams['size'] = config.pangosize
        if len(fontparams):
            ompango.setFont(**fontparams)

    # Execute.

    p = pfunc(*nonkeywords)

    if config.out is None:
        p.show(style=config.omstyle)
    else:
        p.save(config.out,
               style=config.omstyle,
               dims=config.dims,
               margins=config.margin)
Exemple #25
0
def _mir_transpose (vpath, tpath, unused_transpose_args):
    from miriad import VisData
    from mirtask.util import mir2pbp32
    from . import visobjs
    vis = VisData (vpath)

    # Pass 1: build up list of basepols, times

    first = True
    nrecs = 0
    delays = None
    window = None
    fc = visobjs.FreqConfig ()
    times = set ()
    pbps = set ()
    visgen = vis.readLowlevel ('3', False)

    print ('pass 1 ...')

    for inp, pream, data, flags in visgen:
        t = pream[3]
        pbp = mir2pbp32 (inp, pream)
        nrecs += 1

        if first:
            ftrack = fc.makeTracker (inp)
            first = False

        if ftrack.updated ():
            fc.fill (inp)

            if fc.numSpectralWindows () != 1:
                die ('cannot transpose: need exactly one spectral window')

            idents = list (fc.fundamentalWinIdents ())
            newwindow = fc.windowFromIdent (idents[0])

            if window is not None and newwindow != window:
                die ('cannot transpose: frequency config changes inside dataset')

            window = newwindow

        if delays is None:
            nants = inp.getVarInt ('nants')
            dinfo = inp.probeVar ('delay0')

            if dinfo is None:
                delays = False
            elif dinfo[1] == 2 * nants:
                # An ATA extension: one fixed delay per antpol. Reshape
                # to be a bit more sensible
                delays = inp.getVarFloat ('delay0', 2 * nants)
                delays = delays.reshape ((2, nants)).T
            else:
                delays = inp.getVarFloat ('delay0', nants)
                delays = np.vstack ((delays, delays)).T

        times.add (t)
        pbps.add (pbp)

    # Get the timestamps onto a nice even grid, checking that our
    # gridding is decent.

    datatimes = np.asarray (sorted (times), dtype=np.double)
    nt = datatimes.size
    time0 = datatimes[0]
    cadence = np.median (datatimes[1:] - datatimes[:-1])
    tidxs = (datatimes - time0) / cadence
    timemap = np.empty (nt, dtype=np.int)
    nslot = int (round (tidxs[-1])) + 1
    scale = nslot * 1. / nt
    noff = 0

    for i in range (nt):
        timemap[i] = int (round (tidxs[i]))
        if (tidxs[i] - timemap[i]) > 0.01:
            noff += 1

    if noff > 0:
        warn ('had %d timestamps (out of %d) with poor '
              'mapping onto the grid', noff, nt)

    if scale > 1.05:
        warn ('data size increasing by factor of %.2f '
              'to get everything onto the time grid', scale)

    times = np.arange (nslot) * cadence + time0
    nt = nslot

    # Compute a few other things

    pbps = np.asarray (sorted (pbps), dtype=np.int32)
    nbp = pbps.size

    # Without the int(), nchan is a numpy.int32, the type of which
    # propagates along to various byte counts and offsets which end up
    # overflowing for sufficiently large datasets and causing
    # exceptions on negative values getting passed to the various
    # system calls used below.
    nchan = int (window.nchan)
    sdf = window.width / nchan
    sfreq = window.cfreq - 0.5 * (window.width - sdf)

    corr_bytes = 8 * nchan
    uvww_bytes = 4 * 8
    flag_bytes = nchan
    slice_bytes = (corr_bytes + flag_bytes + uvww_bytes) * nt
    dump_bytes = (corr_bytes + flag_bytes + uvww_bytes) * nbp

    nsimult = CACHE_SIZE // dump_bytes

    # Write out header info
    # Write-then-seek seems to break if buffering is used???

    data_offset = ((header.size + 7) // 8) * 8
    data_size = slice_bytes * nbp

    vars_offset = ((data_offset + data_size + 7) // 8) * 8

    f = open (tpath, 'w+', 0)
    f.truncate (vars_offset) # hint how big the file will be
    f.write (header.pack (BYTE_ORDER_MARKER,
                          FORMAT_VERSION,
                          nbp, nt, nchan,
                          sfreq, sdf,
                          time0, cadence,
                          data_offset,
                          vars_offset))

    # Pass 2: write data. Below we cast the tidx variables to ints for
    # the same reason as with nchan above.

    def corr_offset (bpidx, tidx):
        return data_offset + bpidx * slice_bytes + corr_bytes * int (tidx)
    def flag_offset (bpidx, tidx):
        return (data_offset + bpidx * slice_bytes + corr_bytes * nt +
                flag_bytes * int (tidx))
    def uvww_offset (bpidx, tidx):
        return (data_offset + bpidx * slice_bytes + (corr_bytes + flag_bytes) * nt +
                uvww_bytes * int (tidx))

    lsts = np.empty (nt, dtype=np.double)

    corrs = np.empty ((nsimult, nbp, nchan), dtype=np.complex64)
    flags = np.empty ((nsimult, nbp, nchan), dtype=np.int8)
    uvwws = np.empty ((nsimult, nbp, 4), dtype=np.double)
    seen = np.empty ((nsimult, nbp), dtype=np.bool)
    lstbuf = np.empty (nsimult, dtype=np.double)

    empty_corr = np.zeros (nchan, dtype=np.complex64)
    empty_flags = np.zeros (nchan, dtype=np.int8)
    empty_uvww = np.zeros (4, dtype=np.double)

    # Progress reporting:
    unbufout = os.fdopen (os.dup (1), 'w', 0)
    currec = 0
    tstart = time.time ()
    tlastprint = 0

    def dump (curtimes):
        nbatch = len (curtimes)

        tidxs = np.empty (nbatch, dtype=np.int)
        for time, sidx in iteritems(curtimes):
            tidxs[sidx] = timemap[datatimes.searchsorted (time)]
            lsts[tidxs[sidx]] = lstbuf[sidx]

        info = np.empty ((nbatch, 3), dtype=np.int)
        info[:,0] = tidxs.argsort ()
        info[:,1] = tidxs[info[:,0]]
        info[0,2] = 1

        for i in range (1, nbatch):
            info[i,2] = (info[i,1] != info[i-1,1] + 1)

        for bpidx in range (nbp):
            for sidx, tidx, seek in info:
                if seek:
                    f.seek (corr_offset (bpidx, tidx))
                if seen[sidx,bpidx]:
                    f.write (corrs[sidx,bpidx])
                else:
                    f.write (empty_corr)

            for sidx, tidx, seek in info:
                if seek:
                    f.seek (flag_offset (bpidx, tidx))
                if seen[sidx,bpidx]:
                    f.write (flags[sidx,bpidx])
                else:
                    f.write (empty_flags)

            for sidx, tidx, seek in info:
                if seek:
                    f.seek (uvww_offset (bpidx, tidx))
                if seen[sidx,bpidx]:
                    f.write (uvwws[sidx,bpidx])
                else:
                    f.write (empty_uvww)

    newchunk = True
    curtimes = {}
    nrec = nvis = 0
    seenany = np.zeros (nbp, dtype=np.bool)
    meanuvw = np.zeros ((nbp, 3), dtype=np.double)
    muvwcounts = np.zeros (nbp, dtype=np.int)
    visgen = vis.readLowlevel ('3', False)

    print ('pass 2 ...')

    for inp, pream, data, recflags in visgen:
        uvw = pream[:3]
        t = pream[3]
        pbp = mir2pbp32 (inp, pream)
        var = inp.getVariance ()
        if var == 0:
            var = 1.
        weight = 1. / var

        if currec % 500 == 0 and currec:
            now = time.time ()

            if now - tlastprint > 1:
                pct = 100. * currec / nrecs
                elapsed = now - tstart
                total = 1. * elapsed * nrecs / currec
                eta = total - elapsed

                msg = '   %3.1f%% (%d/%d) elapsed %s ETA %s total %s' % \
                    (pct, currec, nrecs, _sfmt (elapsed), _sfmt (eta), _sfmt (total))
                unbufout.write(msg.ljust (60).encode('utf8') + b'\r')
                tlastprint = now

        currec += 1

        if t not in curtimes and len (curtimes) == nsimult:
            msg = '   %3.1f%% (%d/%d) writing ...' % (pct, currec, nrecs)
            unbufout.write(msg.ljust (60).encode('utf8') + b'\r\n')
            dump (curtimes)
            newchunk = True

        if newchunk:
            curtimes = {}
            newchunk = False

        sidx = curtimes.get (t)

        if sidx is None:
            sidx = len (curtimes)
            curtimes[t] = sidx
            seen[sidx].fill (False)

        bpidx = pbps.searchsorted (pbp)

        seen[sidx,bpidx] = True
        uvwws[sidx,bpidx,:3] = uvw
        uvwws[sidx,bpidx,3] = weight
        corrs[sidx,bpidx] = data
        flags[sidx,bpidx] = recflags.astype (np.int8)
        lstbuf[sidx] = inp.getVarDouble ('lst')
        muvwcounts[bpidx] += 1
        meanuvw[bpidx] += uvw

        if recflags.any ():
            seenany[bpidx] = 1

        nrec += 1
        nvis += data.size

    if len (curtimes):
        msg = '   100%% (%d/%d) writing ...' % (currec, nrecs)
        unbufout.write(msg.ljust (60).encode('utf8') + b'\r')
        dump (curtimes)

    tfinish = time.time ()
    elapsed = tfinish - tstart
    print ('   100%% (%d/%d) elapsed %s ETA 0s total %s   ' % \
           (currec, nrecs, _sfmt (elapsed), _sfmt (elapsed)))
    unbufout.close ()

    # Finally, write out variables

    f.seek (vars_offset)
    savevariable (f, 'vispath', np.fromstring (str (vis), dtype=np.byte))
    savevariable (f, 'basepols', pbps)
    if delays is not False:
        savevariable (f, 'delays', delays)
    flaggedbps = pbps[np.where (seenany == 0)]
    savevariable (f, 'flaggedbps', flaggedbps)
    savevariable (f, 'lsts', lsts)

    wbad = np.where (muvwcounts == 0)
    muvwcounts[wbad] = 1
    meanuvw[:,0] /= muvwcounts # apparently broadcasting doesn't
    meanuvw[:,1] /= muvwcounts # do what you'd want here. Not sure
    meanuvw[:,2] /= muvwcounts # why, but it's only two extra lines.
    meanuvw[wbad] = 0
    # Take the mean across the spectral window, as well as in time:
    meanuvw *= window.cfreq / sfreq
    savevariable (f, 'meanuvws', meanuvw)

    f.close ()
    return nrec, nvis, data_size
Exemple #26
0
 def omstyle(v):
     try:
         return getattr(om.styles, v)()
     except:
         cli.die('can\'t load/instantiate OmegaPlot style "%s"', v)
Exemple #27
0
def movie_cli(args):
    import cairo, subprocess, tempfile
    from pwkit.cli import die
    from pwkit.data_gui_helpers import Clipper, ColorMapper
    from pwkit.io import Path

    settings = make_movie_parser().parse_args(args=args)
    ii = IntegratedImages(settings.inpath)

    if settings.kind == 'rot':
        print('Rotation movie; non-movie freq choice is:',
              ii.freq_names[settings.index])
        cube = np.array(
            ii.rotmovie(settings.index, settings.stokes, yflip=True))
    elif settings.kind == 'spec':
        print('Spectrum movie; non-movie CML choice is:',
              ii.cmls[settings.index])
        cube = np.array(
            ii.specmovie(settings.index, settings.stokes, yflip=True))
    else:
        die('unrecognized movie type %r', settings.kind)

    if settings.crop != 0:
        c = settings.crop
        cube = cube[:, c:-c, c:-c]

    n, h, w = cube.shape

    s = settings.scaling
    h *= s
    w *= s
    scaled = np.empty((h, w), dtype=cube.dtype)
    tiled = scaled.reshape((h // s, s, w // s, s))

    stride = cairo.ImageSurface.format_stride_for_width(cairo.FORMAT_ARGB32, w)
    assert stride % 4 == 0  # stride is in bytes
    assert stride == 4 * w

    clipper = Clipper()
    clipper.alloc_buffer(scaled)
    clipper.set_tile_size()

    if settings.symmetrize:
        m = np.nanmax(np.abs(cube))
        clipper.dmin = -m
        clipper.dmax = m
    else:
        clipper.default_bounds(cube)

    mapper = ColorMapper(settings.colormap)
    mapper.alloc_buffer(scaled)
    mapper.set_tile_size()

    surface = cairo.ImageSurface.create_for_data(mapper.buffer,
                                                 cairo.FORMAT_ARGB32, w, h,
                                                 stride)

    tempdir = Path(tempfile.mkdtemp())
    argv = [
        'convert',
        '-delay',
        str(settings.delay),
        '-loop',
        '0',
    ]

    for i, plane in enumerate(cube):
        tiled[...] = plane.reshape((plane.shape[0], 1, plane.shape[1], 1))
        clipper.invalidate()
        clipper.ensure_all_updated(scaled)
        mapper.invalidate()
        mapper.ensure_all_updated(clipper.buffer)
        png = str(tempdir / ('%d.png' % i))
        surface.write_to_png(png)
        argv.append(png)

    argv += [settings.outpath]
    subprocess.check_call(argv, shell=False)
    tempdir.rmtree()
Exemple #28
0
def view_hdf5_cli(args):
    """XXX: huge code redundancy with "view cube". Whatever."""
    import h5py

    ap = argparse.ArgumentParser(prog='vernon view hdf5', )
    ap.add_argument('-s',
                    dest='stretch',
                    type=str,
                    nargs=1,
                    default=['default'],
                    choices='default sqrt neg'.split(),
                    help='What kind of stretch to use on the data.')
    ap.add_argument(
        '-p',
        dest='outer_plane_number',
        metavar='P',
        type=int,
        help='Isolate the outermost P\'th plane of the cube before viewing.')
    ap.add_argument('-T',
                    dest='transpose',
                    action='store_true',
                    help='Transpose the array before viewing.')
    ap.add_argument(
        '-f',
        dest='y_flip',
        action='store_true',
        help='Render the cube so that the first row is at the bottom.')
    ap.add_argument('FILE', metavar='HDF5-PATH', help='The HDF5 file to load')
    ap.add_argument('ITEMS',
                    nargs='+',
                    metavar='ITEM-NAMES',
                    help='The name of the item within the file to view')

    settings = ap.parse_args(args=args)
    stretch_spec = settings.stretch[0]

    if stretch_spec == 'default':
        stretch = lambda data: data
    elif stretch_spec == 'sqrt':

        def stretch(data):
            neg = (data < 0)
            data[neg] *= -1
            data = np.sqrt(data)
            data[neg] *= -1
            return data
    elif stretch_spec == 'neg':
        stretch = lambda data: (data < 0).astype(np.int)
    else:
        cli.die('unknown stretch specification %r', stretch_spec)

    if settings.y_flip:
        y_slice = slice(None, None, -1)
    else:
        y_slice = slice(None, None)

    def describe(a):
        print('Final shape:', repr(a.shape))
        print('Min/max/med: %.16e  %.16e  %.16e' %
              (np.nanmin(a), np.nanmax(a), np.nanmedian(a)))
        print('# positive / # negative / # nonfinite: %d  %d  %d' %
              ((a > 0).sum(), (a < 0).sum(), (~np.isfinite(a)).sum()))
        return a  # convenience

    arrays = []

    with h5py.File(settings.FILE, 'r') as ds:
        for item in settings.ITEMS:
            a = ds[item][...]
            if settings.outer_plane_number is not None:
                a = a[settings.outer_plane_number]
            arrays.append(a)

    if len(arrays) > 2:
        a = np.stack(arrays)
    else:
        a = arrays[0]

    if settings.transpose:
        a = a.T

    if a.ndim == 2:
        stretched = stretch(describe(a))
        view(stretched[y_slice], yflip=settings.y_flip)
    elif a.ndim == 3:
        stretched = stretch(describe(a))
        cycle(stretched[:, y_slice], yflip=settings.y_flip)
    elif a.ndim == 4:
        print('Shape:', a.shape)
        for i in range(a.shape[0]):
            stretched = stretch(describe(a[i]))
            cycle(stretched[:, y_slice], yflip=settings.y_flip)
    else:
        cli.die('cannot handle %d-dimensional arrays', a.ndim)
Exemple #29
0
def grtrans_cli(settings):
    from pwkit import cgs
    from pwkit.cli import die
    from time import time
    from .impl import PhysicalApproximator, hardcoded_nu_ref, hardcoded_ne_ref

    # Read and validate the test dataset.

    testdata = pd.read_table(settings.testdata)

    psi = testdata.get('psi(meta)')
    if psi is None:
        die('the test dataset must contain a column of field-to-Stokes-U angles \"psi(meta)\"'
            )

    d = testdata.get('d(meta)')
    if d is None:
        die('the test dataset must contain a column of integration path lengths \"d(meta)\"'
            )

    n_e = testdata.get('n_e(meta)')
    if n_e is None:
        die('the test dataset must contain a column of particle densities \"n_e(meta)\"'
            )

    time_ms = testdata.get('time_ms(meta)')
    if time_ms is None:
        die('the test dataset must contain a column of computation times \"time_ms(meta)\"'
            )

    s = None
    theta = None
    others = {}

    for col in testdata.columns:
        if col.startswith('s('):
            s = testdata[col]
        elif col.startswith('theta('):
            theta = testdata[col]
        elif col.endswith('(lin)') or col.endswith('(log)'):
            others[col.split('(')[0]] = testdata[col]

    if s is None:
        die('the test dataset must have an input parameter of the harmonic number \"s\"'
            )

    if theta is None:
        die('the test dataset must have an input parameter of the field-to-LOS angle \"theta\"'
            )

    # Get the coefficients into physical units, packed in our standard format.

    nu_hz = settings.frequency * 1e9
    freq_scale = nu_hz / hardcoded_nu_ref
    n_e_scale = n_e / hardcoded_ne_ref

    coeffs = np.empty((psi.size, 8))
    coeffs[..., 0] = testdata['j_I(res)'] * freq_scale
    coeffs[..., 1] = testdata['alpha_I(res)'] / freq_scale
    coeffs[..., 2] = testdata['j_Q(res)'] * freq_scale
    coeffs[..., 3] = testdata['alpha_Q(res)'] / freq_scale
    coeffs[..., 4] = testdata['j_V(res)'] * freq_scale
    coeffs[..., 5] = testdata['alpha_V(res)'] / freq_scale
    coeffs[..., 6] = testdata['rho_Q(res)'] / freq_scale
    coeffs[..., 7] = testdata['rho_V(res)'] / freq_scale
    coeffs *= n_e_scale.values.reshape((-1, 1))

    # Ground truth:

    iquv_precise = integrate(d, coeffs, psi)
    ctime_precise = time_ms.sum()
    print(
        'Precise computation: I={:.4e}  Q={:.4e}  U={:.4e}  V={:.4e}  calc_time={:.0f} ms'
        .format(iquv_precise[0], iquv_precise[1], iquv_precise[2],
                iquv_precise[3], ctime_precise))

    # Now set up the approximator and do the same thing. (Note that often the
    # timing seems backwards, because the time spent doing the precise
    # calculation has already been spent, whereas we have a lot of overhead to
    # set up the neural networks.)

    B = 2 * np.pi * cgs.me * cgs.c * nu_hz / (s * cgs.e)
    approx = PhysicalApproximator(settings.nndir)
    t0 = time()
    coeffs, oos = approx.compute_all_nontrivial(nu_hz, B, n_e, theta, **others)
    ctime_approx = 1000 * (time() - t0)

    if np.any(oos != 0):
        print('WARNING: some of the approximations were out-of-sample')

    iquv_approx = integrate(d, coeffs, psi)
    print(
        'Approx. computation: I={:.4e}  Q={:.4e}  U={:.4e}  V={:.4e}  calc_time={:.0f} ms'
        .format(iquv_approx[0], iquv_approx[1], iquv_approx[2], iquv_approx[3],
                ctime_approx))

    acc = np.abs((iquv_approx - iquv_precise) / iquv_precise)
    print('Accuracy: I={:.3f}  Q={:.3f}  U={:.3f}  V={:.3f}'.format(
        acc[0], acc[1], acc[2], acc[3]))

    print('Speedup: {:.1f}'.format(ctime_precise / ctime_approx))
Exemple #30
0
 def omstyle (v):
     try:
         return getattr (om.styles, v) ()
     except:
         cli.die ('can\'t load/instantiate OmegaPlot style "%s"', v)
Exemple #31
0
def doit (driver, args):
    # Load up the driver code

    try:
        text = open (driver).read ()
    except Exception as e:
        cli.die ('cannot read driver file "%s": %s', driver, e)

    try:
        code = compile (text, driver, 'exec')
    except Exception as e:
        if 'OMEGAFIG_BACKTRACE' in os.environ:
            raise
        cli.die ('cannot compile driver file "%s": %s', driver, e)

    ns = {'__file__': driver,
          '__name__': '__omegafig__'}

    try:
        exec (code, ns)
    except Exception as e:
        if 'OMEGAFIG_BACKTRACE' in os.environ:
            raise
        cli.die ('cannot execute driver file "%s": %s', driver, e)

    pfunc = ns.get ('plot')
    if pfunc is None:
        cli.die ('driver file "%s" does not provide a function called "plot"', driver)
    if not callable (pfunc):
        cli.die ('driver file "%s" provides something called "plot", but it\'s '
                 'not a function', driver)

    # Deal with args

    try:
        code = pfunc.__code__
    except AttributeError:
        code = pfunc.func_code

    nargs = code.co_argcount
    argnames = code.co_varnames

    keywords = []
    nonkeywords = []

    for arg in args:
        if '=' in arg:
            keywords.append (arg)
        else:
            nonkeywords.append (arg)

    if len (nonkeywords) != nargs:
        cli.die ('expected %d non-keyword arguments to driver, but got %d',
                 nargs, len (nonkeywords))

    config = Config ()
    defaults = ns.get ('figdefaults')

    if defaults is not None:
        for key in defaults:
            setattr (config, key, defaults[key])

    config.parse (keywords)

    # Set up omegaplot globals as appropriate

    if config.pango:
        try:
            import omega.pango_g3 as ompango
        except ImportError:
            import omega.pango_g2 as ompango

        fontparams = {}
        if config.pangofamily is not None:
            fontparams['family'] = config.pangofamily
        if config.pangosize is not None:
            fontparams['size'] = config.pangosize
        if len (fontparams):
            ompango.setFont (**fontparams)

    # Execute.

    p = pfunc (*nonkeywords)

    if config.out is None:
        p.show (style=config.omstyle)
    else:
        p.save (config.out, style=config.omstyle, dims=config.dims,
                margins=config.margin)
Exemple #32
0
def _ms_transpose (vpath, tpath, transpose_args, squash_time_gaps=False):
    from pwkit.environments.casa import util as casautil
    b = casautil.sanitize_unicode

    def vispath (*args):
        return b(os.path.join (vpath, *args))

    # TODO: I think that with ms.nrow() and ms.range() we can do this
    # while taking only one pass through the data.

    tb = casautil.tools.table ()
    ms = casautil.tools.ms ()
    print ('pass 1 ...')

    # Load polarization stuff we need

    tb.open (vispath ('DATA_DESCRIPTION'))
    ddid_to_pid = tb.getcol (b'POLARIZATION_ID')
    ddid_to_spwid = tb.getcol (b'SPECTRAL_WINDOW_ID')
    tb.close ()

    tb.open (vispath ('POLARIZATION'))
    numcorrs = tb.getcol (b'NUM_CORR')
    npids = numcorrs.size
    prodinfo = [None] * npids

    for i in range (npids):
        corrtypes = tb.getcell (b'CORR_TYPE', i)
        prodinfo[i] = [casautil.pol_to_miriad[c] for c in corrtypes]

    tb.close ()

    ddprods = [prodinfo[p] for p in ddid_to_pid]

    # Load spw configuration stuff we need. Don't grid the info yet
    # since many of the spws may be filtered out by the selection
    # setup.

    tb.open (vispath ('SPECTRAL_WINDOW'))
    nspws = tb.getcol (b'NUM_CHAN').size
    sfreqs = []

    for i in range (nspws):
        sfreqs.append (tb.getcell (b'CHAN_FREQ', i) * 1e-9) # Hz -> GHz

    tb.close ()

    # Antenna info

    tb.open (vispath ('ANTENNA'))
    nants = tb.getcol (b'DISH_DIAMETER').size
    names = tb.getcol (b'NAME')
    stations = tb.getcol (b'STATION')
    fullnames = []
    maxnamelen = 0

    for i in range (nants):
        f = '%s@%s' % (names[i], stations[i])
        fullnames.append (f)
        maxnamelen = max (maxnamelen, len (f))

    antnames = np.zeros ((nants, maxnamelen), dtype=np.byte)

    for i in range (nants):
        f = fullnames[i]
        n = len (f)
        antnames[i,:n] = np.fromstring (f, dtype=np.byte)

    # Open and set up filtering. msselect() says it supports
    # 'polarization' as a field, but it doesn't seem to do anything?

    ms.open (vispath ())
    ms_selectors = frozenset ('array baseline field observation polarization '
                              'scan scanintent spw taql time uvdist'.split ())
    mssel = dict (kv for kv in iteritems(transpose_args)
                  if kv[0] in ms_selectors)
    # ms.selectinit () needed for selectpolarization() below
    ms.msselect (b(mssel))

    # Changes shape of 'data' column below. Disable for now since
    # I don't feel like debugging it.
    if 'polarization' in transpose_args:
        warn ('polarization selection not implemented for MS data')
        pass #ms.selectpolarization (transpose_args['polarization'].split (','))

    # Get table of times and basepols

    ms.iterinit (maxrows=65536) # XXX semi-arbitrary constant
    ms.iterorigin ()
    colnames = b('time antenna1 antenna2 data_desc_id'.split ())
    nrecs = 0
    times = set ()
    pbps = set ()
    seenspws = set ()

    while True:
        cols = ms.getdata (items=colnames)
        # time is (chunksize)

        for i in range (cols['time'].size):
            t = cols['time'][i] / 86400. + 2400000.5 # CASA to miriad timesystems

            ddid = cols['data_desc_id'][i]

            pi = ddprods[ddid]
            a1 = cols['antenna1'][i] + 1 # 0-based -> 1-based
            a2 = cols['antenna2'][i] + 1

            seenspws.add (ddid_to_spwid[ddid])

            for j in range (len (pi)):
                nrecs += 1
                pbp = mtutil.bpToPBP32 (mtutil.aap2bp (a1, a2, pi[j]))
                times.add (t)
                pbps.add (pbp)

        if not ms.iternext ():
            break

    # Get the timestamps onto a nice even grid, checking that our
    # gridding is decent.

    datatimes = np.asarray (sorted (times), dtype=np.double)
    nt = datatimes.size
    time0 = datatimes[0]
    cadence = np.median (datatimes[1:] - datatimes[:-1])
    tidxs = (datatimes - time0) / cadence
    timemap = np.empty (nt, dtype=np.int)
    ntslot = int (round (tidxs[-1])) + 1
    tscale = ntslot * 1. / nt
    ntoff = 0

    if squash_time_gaps:
        slot_to_data = np.zeros (ntslot, dtype=np.int) - 1

    for i in range (nt):
        timemap[i] = int (round (tidxs[i]))
        if (tidxs[i] - timemap[i]) > 0.01:
            ntoff += 1

        if squash_time_gaps:
            slot_to_data[timemap[i]] = i

    if ntoff > 0:
        warn ('had %d timestamps (out of %d) with poor mapping onto the grid',
              ntoff, nt)

    if squash_time_gaps:
        # Re-index the data to remove time gaps. As a convenience we throw in
        # a small break between discrete observations.
        seen_any = False
        in_populated_run = False
        squashed_idx = 0
        new_gap_size = 1

        for i in range (ntslot):
            if slot_to_data[i] == -1:
                # There are no data for this slot.
                in_populated_run = False
            else:
                # There are data for this slot.
                if not in_populated_run and seen_any:
                    squashed_idx += new_gap_size
                timemap[slot_to_data[i]] = squashed_idx
                squashed_idx += 1
                seen_any = True
                in_populated_run = True

        ntslot = squashed_idx
        tscale = ntslot * 1. / nt

    if tscale > 1.05:
        warn ('data size increasing by factor of %.2f to get everything onto '
              'the time grid', tscale)

    nt = ntslot

    # Now do the same thing for the spectral windows that are actually used,
    # computing lookup info for fast mapping of DDID to our frequency grid.

    freqs = set ()

    for spwid in seenspws:
        freqs.update (sfreqs[spwid])

    datafreqs = np.asarray (sorted (freqs), dtype=np.double)
    nf = datafreqs.size
    freq0 = datafreqs[0]
    sdf = np.median (datafreqs[1:] - datafreqs[:-1])
    nfslot = int (round ((datafreqs[-1] - freq0) / sdf)) + 1
    fscale = nfslot * 1. / nf
    ddfreqmap = []
    nfoff = 0
    maxnchan = 0

    for i in range (len (ddid_to_spwid)):
        spwid = ddid_to_spwid[i]
        if spwid not in seenspws:
            ddfreqmap.append (None)
            continue

        # If more than one DDID shares a SPWID, we're recomputing this stuff.
        # Oh well.

        ddfreqs = sfreqs[spwid]
        ddidx0 = None
        ddprevidx = None

        if ddfreqs.size > 1 and ddfreqs[1] < ddfreqs[0]:
            ddstep = -1
        else:
            ddstep = 1

        for j in range (ddfreqs.size):
            trueidx = (ddfreqs[j] - freq0) / sdf
            ddidx = int (round (trueidx))

            if (ddidx - trueidx) > 0.01:
                nfoff += 1

            if j == 0:
                ddidx0 = ddidx
            elif ddidx != ddprevidx + ddstep:
                die ('cannot transpose: spw must map directly onto freq grid '
                     '(spw #%d, chan %d->%d, %d->%d)', spwid, j - 1, j,
                     ddprevidx, ddidx)

            ddprevidx = ddidx

        if ddstep == -1:
            ddidx0 = ddidx

        ddfreqmap.append ((ddidx0, ddfreqs.size, ddstep))
        maxnchan = max (maxnchan, ddfreqs.size)

    if nfoff > 0:
        warn ('had %d frequencies (out of %d) with poor mapping onto the grid',
              nfoff, nf)

    if fscale > 1.05:
        warn ('data size increasing by factor of %.2f to get everything onto '
              'the frequency grid', fscale)

    freqs = np.arange (nfslot) * sdf + freq0
    nf = nfslot

    # Compute offsets and record sizes for our output file, and write
    # the header. Write-then-seek seems to break if buffering is used???

    pbps = np.asarray (sorted (pbps), dtype=np.int32)
    nbp = pbps.size

    corr_bytes = 8 * nf
    uvww_bytes = 4 * 8
    flag_bytes = nf
    slice_bytes = (corr_bytes + flag_bytes + uvww_bytes) * nt

    data_offset = ((header.size + 7) // 8) * 8
    data_size = slice_bytes * nbp

    vars_offset = ((data_offset + data_size + 7) // 8) * 8

    def corr_offset (bpidx, tidx, fidx):
        return data_offset + bpidx * slice_bytes + corr_bytes * tidx + 8 * fidx

    def flag_offset (bpidx, tidx, fidx):
        return (data_offset + bpidx * slice_bytes + corr_bytes * nt +
                flag_bytes * tidx + fidx)

    def uvww_offset (bpidx, tidx):
        return (data_offset + bpidx * slice_bytes + (corr_bytes + flag_bytes) * nt +
                uvww_bytes * tidx)

    f = open (tpath, 'wb+', 0)
    f.truncate (vars_offset) # hint how big the file will be
    f.write (header.pack (BYTE_ORDER_MARKER,
                          FORMAT_VERSION,
                          nbp, nt, nf,
                          freq0, sdf,
                          time0, cadence,
                          data_offset,
                          vars_offset))

    # Our little system for buffering/writing data. Given how the CASA Python
    # interface works, I don't think we can preallocate a huge buffer that
    # everything gets stuffed in. Which is sad. TODO: smarter data structure
    # that sorts the keys as we insert them.

    buffer_size = [0] # hack so we can modify value in the funcs below
    buffer_info = {}
    buffer_data = np.empty (CACHE_SIZE, dtype=np.uint8)
    currec = 0

    def dump ():
        if not len (buffer_info):
            return

        pct = 100. * currec / nrecs
        msg = '   %3.1f%% (%d/%d) writing ...' % (pct, currec, nrecs)
        unbufout.write(msg.ljust (60).encode('utf8') + b'\r')

        offsets = sorted (iterkeys(buffer_info))
        curofs = None

        for offset in offsets:
            bofs, blen = buffer_info[offset]

            if curofs is None or offset != curofs:
                f.seek (offset)
            f.write (buffer_data[bofs:bofs+blen])

            curofs = offset + blen

        buffer_size[0] = 0
        buffer_info.clear ()

    def bufferview (offset, dtype, nelem):
        bofs = (buffer_size[0] + 7) & (~7) # align for safety
        blen = dtype ().nbytes * nelem

        if bofs + blen > CACHE_SIZE:
            dump ()
            bofs = 0

        # if paranoid, check that offset not already in buffer_data
        buffer_size[0] = bofs + blen
        buffer_info[offset] = (bofs, blen)
        return buffer_data[bofs:bofs+blen].view (dtype)

    # Pass 2: write data. Set up some stuff for progress reporting.
    # NOTE: we're going to keep on rewriting uvw once for each spw

    print ('pass 2 ...')

    unbufout = os.fdopen (os.dup (1), 'wb', 0)
    tstart = time.time ()
    tlastprint = 0
    nvis = 0
    seenany = np.zeros (nbp, dtype=np.bool)
    meanuvw = np.zeros ((nbp, 3), dtype=np.double)
    muvwcounts = np.zeros (nbp, dtype=np.int)

    datacol = transpose_args.get ('datacol', 'data')
    colnames = b([datacol] +
                 'time antenna1 antenna2 data_desc_id flag uvw sigma'.split ())
    maxrows = CACHE_SIZE // (2 * maxnchan * 16) # 128 bits per viz.; factor of 2 safety margin
    ms.iterinit (maxrows=maxrows)
    ms.iterorigin ()

    while True:
        cols = ms.getdata (items=colnames)
        # flag and data are (npol, nchan, chunksize)
        # uvw is (3, chunksize)
        # sigma is (npol, chunksize)
        # rest are scalars, shape (chunksize)
        # data is complex128!!! converting is super slow and sad :-(

        data = cols[datacol]
        flags = cols['flag']

        for i in range (cols['time'].size):
            t = cols['time'][i] / 86400. + 2400000.5 # CASA to miriad timesystems
            tidx = timemap[datatimes.searchsorted (t)]
            ddid = cols['data_desc_id'][i]
            pi = ddprods[ddid]
            npol = len (pi)
            a1 = cols['antenna1'][i] + 1 # 0-based -> 1-based
            a2 = cols['antenna2'][i] + 1
            freqidx0, nchan, step = ddfreqmap[ddid]

            if currec % 100 == 0 and currec:
                now = time.time ()

                if now - tlastprint > 1:
                    pct = 100. * currec / nrecs
                    elapsed = now - tstart
                    total = 1. * elapsed * nrecs / currec
                    eta = total - elapsed

                    msg = '   %3.1f%% (%d/%d) elapsed %s ETA %s total %s' % \
                        (pct, currec, nrecs, _sfmt (elapsed), _sfmt (eta), _sfmt (total))
                    unbufout.write(msg.ljust (60).encode('utf8') + b'\r')
                    tlastprint = now

            nvis += npol * nchan

            for j in range (npol):
                currec += 1
                pbp = mtutil.bpToPBP32 (mtutil.aap2bp (a1, a2, pi[j]))
                bpidx = pbps.searchsorted (pbp)

                uvww = bufferview (uvww_offset (bpidx, tidx), np.double, 4)
                uvww[:3] = cols['uvw'][:,i] * casautil.INVERSE_C_MNS
                uvww[3] = cols['sigma'][j,i]**-2
                muvwcounts[bpidx] += 1
                meanuvw[bpidx] += uvww[:3]

                corrdata = bufferview (corr_offset (bpidx, tidx, freqidx0),
                                       np.complex64, nchan)
                corrdata[:] = data[j,::step,i] # copy and convert

                flagdata = bufferview (flag_offset (bpidx, tidx, freqidx0),
                                       np.uint8, nchan)
                np.logical_not (flags[j,::step,i], flagdata)

                if flagdata.any ():
                    seenany[bpidx] = 1

        if not ms.iternext ():
            break

    dump ()

    tfinish = time.time ()
    elapsed = tfinish - tstart
    print ('   100%% (%d/%d) elapsed %s ETA 0s total %s   ' %
           (currec, nrecs, _sfmt (elapsed), _sfmt (elapsed)))
    unbufout.close ()

    # Finally, write out variables

    f.seek (vars_offset)
    savevariable (f, 'vispath', np.fromstring (b(vpath), dtype=np.byte))
    savevariable (f, 'basepols', pbps)
    savevariable (f, 'antnames', antnames)
    flaggedbps = pbps[np.where (seenany == 0)]
    savevariable (f, 'flaggedbps', flaggedbps)
    s = ' '.join ('%s=%s' % t for t in iteritems(transpose_args))
    savevariable (f, 'transargs', np.fromstring (b(s), dtype=np.byte))

    wbad = np.where (muvwcounts == 0)
    muvwcounts[wbad] = 1
    meanuvw[:,0] /= muvwcounts # see _mir_transpose ()
    meanuvw[:,1] /= muvwcounts
    meanuvw[:,2] /= muvwcounts
    meanuvw[wbad] = 0
    meanuvw *= (freq0 + 0.5 * sdf * nf) / freq0
    savevariable (f, 'meanuvws', meanuvw)

    f.close ()
    ms.close ()
    return currec, nvis, data_size