Beispiel #1
0
def get_lines (path):
    try:
        with io.open (path, 'rt') as f:
            for line in f:
                yield line.strip ()
    except Exception as e:
        warn ('failed to read from "%s": %s', path, e)
Beispiel #2
0
def get_one_line (path):
    try:
        with io.open (path, 'rt') as f:
            return f.readline ().strip ()
    except Exception as e:
        warn ('failed to read a line from "%s": %s', path, e)
        return None
Beispiel #3
0
def get_sacct_info (jobid, itemname):
    try:
        with io.open (os.devnull, 'rb') as devnull:
            info = subprocess.check_output (['sacct', '-j', str (jobid) + '.batch',
                                             '-n', '-P', '-o', itemname],
                                            shell=False, stdin=devnull, close_fds=True)
            return info.splitlines ()
    except Exception as e:
        warn ('failed to get sacct item "%s" for job %s: %s', itemname, jobid, e)
        return None
Beispiel #4
0
def count_lines (path):
    try:
        n = 0
        with io.open (path, 'rt') as f:
            for line in f:
                n += 1
        return n
    except Exception as e:
        warn ('failed to count lines of "%s": %s', path, e)
        return 0
Beispiel #5
0
    def getDelay (self, bp):
        from mirtask.util import POL_XX, POL_YY, POL_XY, POL_YX
        ant1, ant2, pol = bp2aap (bp)
        delays = self.vars['delays']

        if max (ant1, ant2) > delays.shape[0]:
            warn ('not enough antennas in delays array!')
            return 0

        if pol == POL_XX:
            pidx1, pidx2 = 0, 0
        elif pol == POL_YY:
            pidx1, pidx2 = 1, 1
        elif pol == POL_XY:
            pidx1, pidx2 = 0, 1
        elif pol == POL_YX:
            pidx1, pidx2 = 1, 0
        else:
            warn ('not sure what to do with this pol for delays')
            pidx1, pidx2 = 0, 0

        return delays[ant2-1,pidx2] - delays[ant1-1,pidx1]
Beispiel #6
0
def get_max_worker_maxrss ():
    maxrss = 0

    for wjobid in get_lines ('worker-arraymasterids'):
        if not len (wjobid):
            continue

        lines = get_sacct_info (wjobid, 'MaxRSS')
        if lines is None:
            continue

        for line in lines:
            line = line.strip ()
            if not len (line):
                continue

            if line[-1] == 'K':
                maxrss = max (maxrss, int (line[:-1]))
            elif line[-1] == 'M':
                maxrss = max (maxrss, int (round (float (line[:-1]) * 1024)))
            else:
                warn ('unexpected sacct MaxRSS output for job %s: %r', wjobid, line)

    return maxrss
Beispiel #7
0
 def on_error (func, path, exc_info):
     warn ('couldn\'t rmtree %s: in %s of %s: %s', self, func.__name__,
           path, exc_info[1])
Beispiel #8
0
    def solve (self):
        if self.nants > self.nsamps:
            cli.warn ('not enough measurements to solve: %d ants, %d samples'
                      % (self.nants, self.nsamps))
            return

        # First solve for (log) amplitudes, which we can do as a classic
        # linear least squares problem (in log space). We're implicitly
        # modeling the source as 1+0j on all baselines, i.e., we're assuming a
        # point source and solving for amplitudes in units of the source flux
        # density.

        lna_A = np.zeros ((self.nsamps, self.nants))

        for i in range (self.nsamps):
            i1, i2 = self.blidxs[i]
            lna_A[i,i1] = 1
            lna_A[i,i2] = 1

        lna_b = np.log (np.abs (self.vis))
        lna_x, lna_chisq, lna_rank, lna_sing = np.linalg.lstsq (lna_A, lna_b)
        lna_chisq = lna_chisq[0]

        # We just solved for log values to model visibilities; to bring
        # visibilities into model domain, we need the inverses of these
        # values. We can then normalize the amplitudes of all of the observed
        # visibilities.

        amps = np.exp (-lna_x)
        normvis = self.vis.copy ()

        for i in range (self.nsamps):
            i1, i2 = self.blidxs[i]
            normvis[i] *= amps[i1] * amps[i2]

        # Now, solve for the phases with a bespoke (but simple) iterative
        # algorithm. For each antenna we just compute the phase of the summed
        # differences between it and the "model" and alter the phase by that.
        # Loosely modeled on MIRIAD gpcal PhaseSol().

        curphasors = np.ones (self.nants, dtype=np.complex)
        newphasors = np.empty (self.nants, dtype=np.complex)
        tol = 1e-5
        damping = 0.9

        for iter_num in range (100):
            newphasors.fill (0)

            for i, vis in enumerate (normvis):
                i1, i2 = self.blidxs[i]
                newphasors[i1] += curphasors[i2] * vis
                newphasors[i2] += curphasors[i1] * np.conj (vis)

            newphasors /= np.abs (newphasors)
            temp = curphasors + damping * (newphasors - curphasors)
            temp /= np.abs (temp)
            delta = (np.abs (temp - curphasors)**2).mean ()
            #print ('ZZ', iter_num, delta, np.angle (temp, deg=True))
            curphasors = temp

            if delta < tol:
                break

        # Calibrate out phases too

        np.conj (curphasors, curphasors)
        gains = amps * curphasors

        for i in range (self.nsamps):
            i1, i2 = self.blidxs[i]
            normvis[i] *= curphasors[i1] * np.conj (curphasors[i2])

        self.gains = gains
        self.normvis = normvis
Beispiel #9
0
def _ms_transpose (vpath, tpath, transpose_args, squash_time_gaps=False):
    from pwkit.environments.casa import util as casautil
    b = casautil.sanitize_unicode

    def vispath (*args):
        return b(os.path.join (vpath, *args))

    # TODO: I think that with ms.nrow() and ms.range() we can do this
    # while taking only one pass through the data.

    tb = casautil.tools.table ()
    ms = casautil.tools.ms ()
    print ('pass 1 ...')

    # Load polarization stuff we need

    tb.open (vispath ('DATA_DESCRIPTION'))
    ddid_to_pid = tb.getcol (b'POLARIZATION_ID')
    ddid_to_spwid = tb.getcol (b'SPECTRAL_WINDOW_ID')
    tb.close ()

    tb.open (vispath ('POLARIZATION'))
    numcorrs = tb.getcol (b'NUM_CORR')
    npids = numcorrs.size
    prodinfo = [None] * npids

    for i in range (npids):
        corrtypes = tb.getcell (b'CORR_TYPE', i)
        prodinfo[i] = [casautil.pol_to_miriad[c] for c in corrtypes]

    tb.close ()

    ddprods = [prodinfo[p] for p in ddid_to_pid]

    # Load spw configuration stuff we need. Don't grid the info yet
    # since many of the spws may be filtered out by the selection
    # setup.

    tb.open (vispath ('SPECTRAL_WINDOW'))
    nspws = tb.getcol (b'NUM_CHAN').size
    sfreqs = []

    for i in range (nspws):
        sfreqs.append (tb.getcell (b'CHAN_FREQ', i) * 1e-9) # Hz -> GHz

    tb.close ()

    # Antenna info

    tb.open (vispath ('ANTENNA'))
    nants = tb.getcol (b'DISH_DIAMETER').size
    names = tb.getcol (b'NAME')
    stations = tb.getcol (b'STATION')
    fullnames = []
    maxnamelen = 0

    for i in range (nants):
        f = '%s@%s' % (names[i], stations[i])
        fullnames.append (f)
        maxnamelen = max (maxnamelen, len (f))

    antnames = np.zeros ((nants, maxnamelen), dtype=np.byte)

    for i in range (nants):
        f = fullnames[i]
        n = len (f)
        antnames[i,:n] = np.fromstring (f, dtype=np.byte)

    # Open and set up filtering. msselect() says it supports
    # 'polarization' as a field, but it doesn't seem to do anything?

    ms.open (vispath ())
    ms_selectors = frozenset ('array baseline field observation polarization '
                              'scan scanintent spw taql time uvdist'.split ())
    mssel = dict (kv for kv in iteritems(transpose_args)
                  if kv[0] in ms_selectors)
    # ms.selectinit () needed for selectpolarization() below
    ms.msselect (b(mssel))

    # Changes shape of 'data' column below. Disable for now since
    # I don't feel like debugging it.
    if 'polarization' in transpose_args:
        warn ('polarization selection not implemented for MS data')
        pass #ms.selectpolarization (transpose_args['polarization'].split (','))

    # Get table of times and basepols

    ms.iterinit (maxrows=65536) # XXX semi-arbitrary constant
    ms.iterorigin ()
    colnames = b('time antenna1 antenna2 data_desc_id'.split ())
    nrecs = 0
    times = set ()
    pbps = set ()
    seenspws = set ()

    while True:
        cols = ms.getdata (items=colnames)
        # time is (chunksize)

        for i in range (cols['time'].size):
            t = cols['time'][i] / 86400. + 2400000.5 # CASA to miriad timesystems

            ddid = cols['data_desc_id'][i]

            pi = ddprods[ddid]
            a1 = cols['antenna1'][i] + 1 # 0-based -> 1-based
            a2 = cols['antenna2'][i] + 1

            seenspws.add (ddid_to_spwid[ddid])

            for j in range (len (pi)):
                nrecs += 1
                pbp = mtutil.bpToPBP32 (mtutil.aap2bp (a1, a2, pi[j]))
                times.add (t)
                pbps.add (pbp)

        if not ms.iternext ():
            break

    # Get the timestamps onto a nice even grid, checking that our
    # gridding is decent.

    datatimes = np.asarray (sorted (times), dtype=np.double)
    nt = datatimes.size
    time0 = datatimes[0]
    cadence = np.median (datatimes[1:] - datatimes[:-1])
    tidxs = (datatimes - time0) / cadence
    timemap = np.empty (nt, dtype=np.int)
    ntslot = int (round (tidxs[-1])) + 1
    tscale = ntslot * 1. / nt
    ntoff = 0

    if squash_time_gaps:
        slot_to_data = np.zeros (ntslot, dtype=np.int) - 1

    for i in range (nt):
        timemap[i] = int (round (tidxs[i]))
        if (tidxs[i] - timemap[i]) > 0.01:
            ntoff += 1

        if squash_time_gaps:
            slot_to_data[timemap[i]] = i

    if ntoff > 0:
        warn ('had %d timestamps (out of %d) with poor mapping onto the grid',
              ntoff, nt)

    if squash_time_gaps:
        # Re-index the data to remove time gaps. As a convenience we throw in
        # a small break between discrete observations.
        seen_any = False
        in_populated_run = False
        squashed_idx = 0
        new_gap_size = 1

        for i in range (ntslot):
            if slot_to_data[i] == -1:
                # There are no data for this slot.
                in_populated_run = False
            else:
                # There are data for this slot.
                if not in_populated_run and seen_any:
                    squashed_idx += new_gap_size
                timemap[slot_to_data[i]] = squashed_idx
                squashed_idx += 1
                seen_any = True
                in_populated_run = True

        ntslot = squashed_idx
        tscale = ntslot * 1. / nt

    if tscale > 1.05:
        warn ('data size increasing by factor of %.2f to get everything onto '
              'the time grid', tscale)

    nt = ntslot

    # Now do the same thing for the spectral windows that are actually used,
    # computing lookup info for fast mapping of DDID to our frequency grid.

    freqs = set ()

    for spwid in seenspws:
        freqs.update (sfreqs[spwid])

    datafreqs = np.asarray (sorted (freqs), dtype=np.double)
    nf = datafreqs.size
    freq0 = datafreqs[0]
    sdf = np.median (datafreqs[1:] - datafreqs[:-1])
    nfslot = int (round ((datafreqs[-1] - freq0) / sdf)) + 1
    fscale = nfslot * 1. / nf
    ddfreqmap = []
    nfoff = 0
    maxnchan = 0

    for i in range (len (ddid_to_spwid)):
        spwid = ddid_to_spwid[i]
        if spwid not in seenspws:
            ddfreqmap.append (None)
            continue

        # If more than one DDID shares a SPWID, we're recomputing this stuff.
        # Oh well.

        ddfreqs = sfreqs[spwid]
        ddidx0 = None
        ddprevidx = None

        if ddfreqs.size > 1 and ddfreqs[1] < ddfreqs[0]:
            ddstep = -1
        else:
            ddstep = 1

        for j in range (ddfreqs.size):
            trueidx = (ddfreqs[j] - freq0) / sdf
            ddidx = int (round (trueidx))

            if (ddidx - trueidx) > 0.01:
                nfoff += 1

            if j == 0:
                ddidx0 = ddidx
            elif ddidx != ddprevidx + ddstep:
                die ('cannot transpose: spw must map directly onto freq grid '
                     '(spw #%d, chan %d->%d, %d->%d)', spwid, j - 1, j,
                     ddprevidx, ddidx)

            ddprevidx = ddidx

        if ddstep == -1:
            ddidx0 = ddidx

        ddfreqmap.append ((ddidx0, ddfreqs.size, ddstep))
        maxnchan = max (maxnchan, ddfreqs.size)

    if nfoff > 0:
        warn ('had %d frequencies (out of %d) with poor mapping onto the grid',
              nfoff, nf)

    if fscale > 1.05:
        warn ('data size increasing by factor of %.2f to get everything onto '
              'the frequency grid', fscale)

    freqs = np.arange (nfslot) * sdf + freq0
    nf = nfslot

    # Compute offsets and record sizes for our output file, and write
    # the header. Write-then-seek seems to break if buffering is used???

    pbps = np.asarray (sorted (pbps), dtype=np.int32)
    nbp = pbps.size

    corr_bytes = 8 * nf
    uvww_bytes = 4 * 8
    flag_bytes = nf
    slice_bytes = (corr_bytes + flag_bytes + uvww_bytes) * nt

    data_offset = ((header.size + 7) // 8) * 8
    data_size = slice_bytes * nbp

    vars_offset = ((data_offset + data_size + 7) // 8) * 8

    def corr_offset (bpidx, tidx, fidx):
        return data_offset + bpidx * slice_bytes + corr_bytes * tidx + 8 * fidx

    def flag_offset (bpidx, tidx, fidx):
        return (data_offset + bpidx * slice_bytes + corr_bytes * nt +
                flag_bytes * tidx + fidx)

    def uvww_offset (bpidx, tidx):
        return (data_offset + bpidx * slice_bytes + (corr_bytes + flag_bytes) * nt +
                uvww_bytes * tidx)

    f = open (tpath, 'wb+', 0)
    f.truncate (vars_offset) # hint how big the file will be
    f.write (header.pack (BYTE_ORDER_MARKER,
                          FORMAT_VERSION,
                          nbp, nt, nf,
                          freq0, sdf,
                          time0, cadence,
                          data_offset,
                          vars_offset))

    # Our little system for buffering/writing data. Given how the CASA Python
    # interface works, I don't think we can preallocate a huge buffer that
    # everything gets stuffed in. Which is sad. TODO: smarter data structure
    # that sorts the keys as we insert them.

    buffer_size = [0] # hack so we can modify value in the funcs below
    buffer_info = {}
    buffer_data = np.empty (CACHE_SIZE, dtype=np.uint8)
    currec = 0

    def dump ():
        if not len (buffer_info):
            return

        pct = 100. * currec / nrecs
        msg = '   %3.1f%% (%d/%d) writing ...' % (pct, currec, nrecs)
        unbufout.write(msg.ljust (60).encode('utf8') + b'\r')

        offsets = sorted (iterkeys(buffer_info))
        curofs = None

        for offset in offsets:
            bofs, blen = buffer_info[offset]

            if curofs is None or offset != curofs:
                f.seek (offset)
            f.write (buffer_data[bofs:bofs+blen])

            curofs = offset + blen

        buffer_size[0] = 0
        buffer_info.clear ()

    def bufferview (offset, dtype, nelem):
        bofs = (buffer_size[0] + 7) & (~7) # align for safety
        blen = dtype ().nbytes * nelem

        if bofs + blen > CACHE_SIZE:
            dump ()
            bofs = 0

        # if paranoid, check that offset not already in buffer_data
        buffer_size[0] = bofs + blen
        buffer_info[offset] = (bofs, blen)
        return buffer_data[bofs:bofs+blen].view (dtype)

    # Pass 2: write data. Set up some stuff for progress reporting.
    # NOTE: we're going to keep on rewriting uvw once for each spw

    print ('pass 2 ...')

    unbufout = os.fdopen (os.dup (1), 'wb', 0)
    tstart = time.time ()
    tlastprint = 0
    nvis = 0
    seenany = np.zeros (nbp, dtype=np.bool)
    meanuvw = np.zeros ((nbp, 3), dtype=np.double)
    muvwcounts = np.zeros (nbp, dtype=np.int)

    datacol = transpose_args.get ('datacol', 'data')
    colnames = b([datacol] +
                 'time antenna1 antenna2 data_desc_id flag uvw sigma'.split ())
    maxrows = CACHE_SIZE // (2 * maxnchan * 16) # 128 bits per viz.; factor of 2 safety margin
    ms.iterinit (maxrows=maxrows)
    ms.iterorigin ()

    while True:
        cols = ms.getdata (items=colnames)
        # flag and data are (npol, nchan, chunksize)
        # uvw is (3, chunksize)
        # sigma is (npol, chunksize)
        # rest are scalars, shape (chunksize)
        # data is complex128!!! converting is super slow and sad :-(

        data = cols[datacol]
        flags = cols['flag']

        for i in range (cols['time'].size):
            t = cols['time'][i] / 86400. + 2400000.5 # CASA to miriad timesystems
            tidx = timemap[datatimes.searchsorted (t)]
            ddid = cols['data_desc_id'][i]
            pi = ddprods[ddid]
            npol = len (pi)
            a1 = cols['antenna1'][i] + 1 # 0-based -> 1-based
            a2 = cols['antenna2'][i] + 1
            freqidx0, nchan, step = ddfreqmap[ddid]

            if currec % 100 == 0 and currec:
                now = time.time ()

                if now - tlastprint > 1:
                    pct = 100. * currec / nrecs
                    elapsed = now - tstart
                    total = 1. * elapsed * nrecs / currec
                    eta = total - elapsed

                    msg = '   %3.1f%% (%d/%d) elapsed %s ETA %s total %s' % \
                        (pct, currec, nrecs, _sfmt (elapsed), _sfmt (eta), _sfmt (total))
                    unbufout.write(msg.ljust (60).encode('utf8') + b'\r')
                    tlastprint = now

            nvis += npol * nchan

            for j in range (npol):
                currec += 1
                pbp = mtutil.bpToPBP32 (mtutil.aap2bp (a1, a2, pi[j]))
                bpidx = pbps.searchsorted (pbp)

                uvww = bufferview (uvww_offset (bpidx, tidx), np.double, 4)
                uvww[:3] = cols['uvw'][:,i] * casautil.INVERSE_C_MNS
                uvww[3] = cols['sigma'][j,i]**-2
                muvwcounts[bpidx] += 1
                meanuvw[bpidx] += uvww[:3]

                corrdata = bufferview (corr_offset (bpidx, tidx, freqidx0),
                                       np.complex64, nchan)
                corrdata[:] = data[j,::step,i] # copy and convert

                flagdata = bufferview (flag_offset (bpidx, tidx, freqidx0),
                                       np.uint8, nchan)
                np.logical_not (flags[j,::step,i], flagdata)

                if flagdata.any ():
                    seenany[bpidx] = 1

        if not ms.iternext ():
            break

    dump ()

    tfinish = time.time ()
    elapsed = tfinish - tstart
    print ('   100%% (%d/%d) elapsed %s ETA 0s total %s   ' %
           (currec, nrecs, _sfmt (elapsed), _sfmt (elapsed)))
    unbufout.close ()

    # Finally, write out variables

    f.seek (vars_offset)
    savevariable (f, 'vispath', np.fromstring (b(vpath), dtype=np.byte))
    savevariable (f, 'basepols', pbps)
    savevariable (f, 'antnames', antnames)
    flaggedbps = pbps[np.where (seenany == 0)]
    savevariable (f, 'flaggedbps', flaggedbps)
    s = ' '.join ('%s=%s' % t for t in iteritems(transpose_args))
    savevariable (f, 'transargs', np.fromstring (b(s), dtype=np.byte))

    wbad = np.where (muvwcounts == 0)
    muvwcounts[wbad] = 1
    meanuvw[:,0] /= muvwcounts # see _mir_transpose ()
    meanuvw[:,1] /= muvwcounts
    meanuvw[:,2] /= muvwcounts
    meanuvw[wbad] = 0
    meanuvw *= (freq0 + 0.5 * sdf * nf) / freq0
    savevariable (f, 'meanuvws', meanuvw)

    f.close ()
    ms.close ()
    return currec, nvis, data_size
Beispiel #10
0
def _mir_transpose (vpath, tpath, unused_transpose_args):
    from miriad import VisData
    from mirtask.util import mir2pbp32
    from . import visobjs
    vis = VisData (vpath)

    # Pass 1: build up list of basepols, times

    first = True
    nrecs = 0
    delays = None
    window = None
    fc = visobjs.FreqConfig ()
    times = set ()
    pbps = set ()
    visgen = vis.readLowlevel ('3', False)

    print ('pass 1 ...')

    for inp, pream, data, flags in visgen:
        t = pream[3]
        pbp = mir2pbp32 (inp, pream)
        nrecs += 1

        if first:
            ftrack = fc.makeTracker (inp)
            first = False

        if ftrack.updated ():
            fc.fill (inp)

            if fc.numSpectralWindows () != 1:
                die ('cannot transpose: need exactly one spectral window')

            idents = list (fc.fundamentalWinIdents ())
            newwindow = fc.windowFromIdent (idents[0])

            if window is not None and newwindow != window:
                die ('cannot transpose: frequency config changes inside dataset')

            window = newwindow

        if delays is None:
            nants = inp.getVarInt ('nants')
            dinfo = inp.probeVar ('delay0')

            if dinfo is None:
                delays = False
            elif dinfo[1] == 2 * nants:
                # An ATA extension: one fixed delay per antpol. Reshape
                # to be a bit more sensible
                delays = inp.getVarFloat ('delay0', 2 * nants)
                delays = delays.reshape ((2, nants)).T
            else:
                delays = inp.getVarFloat ('delay0', nants)
                delays = np.vstack ((delays, delays)).T

        times.add (t)
        pbps.add (pbp)

    # Get the timestamps onto a nice even grid, checking that our
    # gridding is decent.

    datatimes = np.asarray (sorted (times), dtype=np.double)
    nt = datatimes.size
    time0 = datatimes[0]
    cadence = np.median (datatimes[1:] - datatimes[:-1])
    tidxs = (datatimes - time0) / cadence
    timemap = np.empty (nt, dtype=np.int)
    nslot = int (round (tidxs[-1])) + 1
    scale = nslot * 1. / nt
    noff = 0

    for i in range (nt):
        timemap[i] = int (round (tidxs[i]))
        if (tidxs[i] - timemap[i]) > 0.01:
            noff += 1

    if noff > 0:
        warn ('had %d timestamps (out of %d) with poor '
              'mapping onto the grid', noff, nt)

    if scale > 1.05:
        warn ('data size increasing by factor of %.2f '
              'to get everything onto the time grid', scale)

    times = np.arange (nslot) * cadence + time0
    nt = nslot

    # Compute a few other things

    pbps = np.asarray (sorted (pbps), dtype=np.int32)
    nbp = pbps.size

    # Without the int(), nchan is a numpy.int32, the type of which
    # propagates along to various byte counts and offsets which end up
    # overflowing for sufficiently large datasets and causing
    # exceptions on negative values getting passed to the various
    # system calls used below.
    nchan = int (window.nchan)
    sdf = window.width / nchan
    sfreq = window.cfreq - 0.5 * (window.width - sdf)

    corr_bytes = 8 * nchan
    uvww_bytes = 4 * 8
    flag_bytes = nchan
    slice_bytes = (corr_bytes + flag_bytes + uvww_bytes) * nt
    dump_bytes = (corr_bytes + flag_bytes + uvww_bytes) * nbp

    nsimult = CACHE_SIZE // dump_bytes

    # Write out header info
    # Write-then-seek seems to break if buffering is used???

    data_offset = ((header.size + 7) // 8) * 8
    data_size = slice_bytes * nbp

    vars_offset = ((data_offset + data_size + 7) // 8) * 8

    f = open (tpath, 'w+', 0)
    f.truncate (vars_offset) # hint how big the file will be
    f.write (header.pack (BYTE_ORDER_MARKER,
                          FORMAT_VERSION,
                          nbp, nt, nchan,
                          sfreq, sdf,
                          time0, cadence,
                          data_offset,
                          vars_offset))

    # Pass 2: write data. Below we cast the tidx variables to ints for
    # the same reason as with nchan above.

    def corr_offset (bpidx, tidx):
        return data_offset + bpidx * slice_bytes + corr_bytes * int (tidx)
    def flag_offset (bpidx, tidx):
        return (data_offset + bpidx * slice_bytes + corr_bytes * nt +
                flag_bytes * int (tidx))
    def uvww_offset (bpidx, tidx):
        return (data_offset + bpidx * slice_bytes + (corr_bytes + flag_bytes) * nt +
                uvww_bytes * int (tidx))

    lsts = np.empty (nt, dtype=np.double)

    corrs = np.empty ((nsimult, nbp, nchan), dtype=np.complex64)
    flags = np.empty ((nsimult, nbp, nchan), dtype=np.int8)
    uvwws = np.empty ((nsimult, nbp, 4), dtype=np.double)
    seen = np.empty ((nsimult, nbp), dtype=np.bool)
    lstbuf = np.empty (nsimult, dtype=np.double)

    empty_corr = np.zeros (nchan, dtype=np.complex64)
    empty_flags = np.zeros (nchan, dtype=np.int8)
    empty_uvww = np.zeros (4, dtype=np.double)

    # Progress reporting:
    unbufout = os.fdopen (os.dup (1), 'w', 0)
    currec = 0
    tstart = time.time ()
    tlastprint = 0

    def dump (curtimes):
        nbatch = len (curtimes)

        tidxs = np.empty (nbatch, dtype=np.int)
        for time, sidx in iteritems(curtimes):
            tidxs[sidx] = timemap[datatimes.searchsorted (time)]
            lsts[tidxs[sidx]] = lstbuf[sidx]

        info = np.empty ((nbatch, 3), dtype=np.int)
        info[:,0] = tidxs.argsort ()
        info[:,1] = tidxs[info[:,0]]
        info[0,2] = 1

        for i in range (1, nbatch):
            info[i,2] = (info[i,1] != info[i-1,1] + 1)

        for bpidx in range (nbp):
            for sidx, tidx, seek in info:
                if seek:
                    f.seek (corr_offset (bpidx, tidx))
                if seen[sidx,bpidx]:
                    f.write (corrs[sidx,bpidx])
                else:
                    f.write (empty_corr)

            for sidx, tidx, seek in info:
                if seek:
                    f.seek (flag_offset (bpidx, tidx))
                if seen[sidx,bpidx]:
                    f.write (flags[sidx,bpidx])
                else:
                    f.write (empty_flags)

            for sidx, tidx, seek in info:
                if seek:
                    f.seek (uvww_offset (bpidx, tidx))
                if seen[sidx,bpidx]:
                    f.write (uvwws[sidx,bpidx])
                else:
                    f.write (empty_uvww)

    newchunk = True
    curtimes = {}
    nrec = nvis = 0
    seenany = np.zeros (nbp, dtype=np.bool)
    meanuvw = np.zeros ((nbp, 3), dtype=np.double)
    muvwcounts = np.zeros (nbp, dtype=np.int)
    visgen = vis.readLowlevel ('3', False)

    print ('pass 2 ...')

    for inp, pream, data, recflags in visgen:
        uvw = pream[:3]
        t = pream[3]
        pbp = mir2pbp32 (inp, pream)
        var = inp.getVariance ()
        if var == 0:
            var = 1.
        weight = 1. / var

        if currec % 500 == 0 and currec:
            now = time.time ()

            if now - tlastprint > 1:
                pct = 100. * currec / nrecs
                elapsed = now - tstart
                total = 1. * elapsed * nrecs / currec
                eta = total - elapsed

                msg = '   %3.1f%% (%d/%d) elapsed %s ETA %s total %s' % \
                    (pct, currec, nrecs, _sfmt (elapsed), _sfmt (eta), _sfmt (total))
                unbufout.write(msg.ljust (60).encode('utf8') + b'\r')
                tlastprint = now

        currec += 1

        if t not in curtimes and len (curtimes) == nsimult:
            msg = '   %3.1f%% (%d/%d) writing ...' % (pct, currec, nrecs)
            unbufout.write(msg.ljust (60).encode('utf8') + b'\r\n')
            dump (curtimes)
            newchunk = True

        if newchunk:
            curtimes = {}
            newchunk = False

        sidx = curtimes.get (t)

        if sidx is None:
            sidx = len (curtimes)
            curtimes[t] = sidx
            seen[sidx].fill (False)

        bpidx = pbps.searchsorted (pbp)

        seen[sidx,bpidx] = True
        uvwws[sidx,bpidx,:3] = uvw
        uvwws[sidx,bpidx,3] = weight
        corrs[sidx,bpidx] = data
        flags[sidx,bpidx] = recflags.astype (np.int8)
        lstbuf[sidx] = inp.getVarDouble ('lst')
        muvwcounts[bpidx] += 1
        meanuvw[bpidx] += uvw

        if recflags.any ():
            seenany[bpidx] = 1

        nrec += 1
        nvis += data.size

    if len (curtimes):
        msg = '   100%% (%d/%d) writing ...' % (currec, nrecs)
        unbufout.write(msg.ljust (60).encode('utf8') + b'\r')
        dump (curtimes)

    tfinish = time.time ()
    elapsed = tfinish - tstart
    print ('   100%% (%d/%d) elapsed %s ETA 0s total %s   ' % \
           (currec, nrecs, _sfmt (elapsed), _sfmt (elapsed)))
    unbufout.close ()

    # Finally, write out variables

    f.seek (vars_offset)
    savevariable (f, 'vispath', np.fromstring (str (vis), dtype=np.byte))
    savevariable (f, 'basepols', pbps)
    if delays is not False:
        savevariable (f, 'delays', delays)
    flaggedbps = pbps[np.where (seenany == 0)]
    savevariable (f, 'flaggedbps', flaggedbps)
    savevariable (f, 'lsts', lsts)

    wbad = np.where (muvwcounts == 0)
    muvwcounts[wbad] = 1
    meanuvw[:,0] /= muvwcounts # apparently broadcasting doesn't
    meanuvw[:,1] /= muvwcounts # do what you'd want here. Not sure
    meanuvw[:,2] /= muvwcounts # why, but it's only two extra lines.
    meanuvw[wbad] = 0
    # Take the mean across the spectral window, as well as in time:
    meanuvw *= window.cfreq / sfreq
    savevariable (f, 'meanuvws', meanuvw)

    f.close ()
    return nrec, nvis, data_size
Beispiel #11
0
def main ():
    info = Holder ()
    info.jobname = get_one_line ('jobname.txt')
    info.jobid = get_one_line ('jobid')

    if info.jobid is None:
        info.jobid_fetch_failed = 1
        info.jobid = '?'
    else:
        line = get_sacct_first (info.jobid, 'ExitCode,MaxRSS,Elapsed,State')
        if line is None:
            info.sacct_fetch_failed = 1
            # Could theoretically fill in some of these from our various log
            # files but I can't imagine a situation where sacct will actually
            # fail on us.
            info.exitinfo = '?'
            info.mastermaxrss = '?'
            info.elapsed = '?'
            info.state = '?'
            info.success = -1
        else:
            info.exitinfo, info.mastermaxrss, info.elapsed, info.state = line.split ('|')
            info.success = 1 if info.exitinfo == '0:0' else 0

    info.workermaxrss = get_max_worker_maxrss ()

    tsubmit = get_one_line ('submit.wallclock')
    tstart = get_one_line ('start.wallclock')
    if tsubmit is not None and tstart is not None:
        info.startdelay = int (tstart) - int (tsubmit)

    try:
        info.ntasks = -1
        info.tot_nsuccess = -1
        info.tot_nfail = -1
        info.nleft = -1
        info.nattempts = -1
        info.cur_nsuccess = -1
        info.cur_nfail = -1
        natt = 0
        nsucc = 0
        nfail = 0

        info.ntasks = count_lines ('../tasks')
        info.tot_nsuccess = count_lines ('../success')
        info.tot_nfail = count_lines ('../failure')
        info.nleft = info.ntasks - info.tot_nsuccess - info.tot_nfail

        with io.open ('attempts.log', 'rt') as f:
            for line in f:
                pieces = line.strip ().split ()
                if pieces[1] == 'issued':
                    natt += 1
                elif pieces[1] == 'complete':
                    if pieces[-1] == '0':
                        nsucc += 1
                    else:
                        nfail += 1

        info.nattempts = natt
        info.cur_nsuccess = nsucc
        info.cur_nfail = nfail
    except Exception as e:
        warn ('couldn\'t summarize attempts: %s', e)

    with io.open ('postmortem.log', 'wt') as f:
        d = info.__dict__
        for k in sorted (d.iterkeys ()):
            val = d[k]
            if val is not None:
                print ('%s=%s' % (k, val), file=f)

    try:
        with io.open (os.path.expanduser ('~/.robotinfo'), 'rt') as f:
            user = f.readline ().strip ()
            consumer_key = f.readline ().strip ()
            consumer_secret = f.readline ().strip ()
            access_token = f.readline ().strip ()
            access_secret = f.readline ().strip ()

            auth = tweepy.OAuthHandler (consumer_key, consumer_secret)
            auth.set_access_token (access_token, access_secret)
            api = tweepy.API (auth)

            t = ('@' + user + ' %(jobname)s %(state)s succ=%(success)d nt=%(ntasks)d '
                 'ns=%(cur_nsuccess)d nf=%(cur_nfail)d nleft=%(nleft)d' % info.__dict__)
            api.update_status (status=t)
    except Exception as e:
        warn ('couldn\'t tweet: %s', e)