Beispiel #1
0
    def test_create_vis_iter_with_model(self):
        model = create_test_image(canonical=True,
                                  cellsize=0.001,
                                  frequency=self.frequency,
                                  phasecentre=self.phasecentre)
        comp = Skycomponent(direction=self.phasecentre,
                            frequency=self.frequency,
                            flux=self.flux,
                            polarisation_frame=PolarisationFrame('stokesI'))
        vis_iter = create_blockvisibility_iterator(
            self.config,
            self.times,
            self.frequency,
            channel_bandwidth=self.channel_bandwidth,
            phasecentre=self.phasecentre,
            weight=1.0,
            polarisation_frame=PolarisationFrame('stokesI'),
            integration_time=30.0,
            number_integrations=3,
            model=model,
            components=comp)

        fullvis = None
        totalnvis = 0
        for i, bvis in enumerate(vis_iter):
            assert bvis.phasecentre == self.phasecentre
            assert bvis.nvis
            if i == 0:
                fullvis = bvis
                totalnvis = bvis.nvis
            else:
                fullvis = append_visibility(fullvis, bvis)
                totalnvis += bvis.nvis

        assert fullvis.nvis == totalnvis
 def test_append_visibility(self):
         self.vis = create_visibility(self.lowcore, self.times, self.frequency,
                                      channel_bandwidth=self.channel_bandwidth,
                                      phasecentre=self.phasecentre,
                                      weight=1.0)
         othertimes = (numpy.pi / 43200.0) * numpy.arange(300.0, 600.0, 30.0)
         self.othervis = create_visibility(self.lowcore, othertimes, self.frequency,
                                           channel_bandwidth=self.channel_bandwidth,
                                           phasecentre=self.phasecentre,
                                           weight=1.0)
         self.vis = append_visibility(self.vis, self.othervis)
         assert self.vis.nvis == len(self.vis.time)
         assert self.vis.nvis == len(self.vis.frequency)
Beispiel #3
0
 def test_create_vis_iter(self):
     vis_iter = create_blockvisibility_iterator(self.config, self.times, self.frequency,
                                                channel_bandwidth=self.channel_bandwidth,
                                                phasecentre=self.phasecentre,
                                                weight=1.0, polarisation_frame=PolarisationFrame('stokesI'),
                                                integration_time=30.0, number_integrations=3)
     
     fullvis = None
     totalnvis = 0
     for i, vis in enumerate(vis_iter):
         assert vis.nvis
         if i == 0:
             fullvis = vis
             totalnvis = vis.nvis
         else:
             fullvis = append_visibility(fullvis, vis)
             totalnvis += vis.nvis
     
     assert fullvis.nvis == totalnvis
Beispiel #4
0
def dprepb_imaging(vis_input):
    """The DPrepB/C imaging pipeline for visibility data.
        
    Args:
    vis_input (array): array of ARL visibility data and parameters.
    
    Returns:
    restored: clean image.
    """
    # Load the Input Data
    # ------------------------------------------------------
    vis1 = vis_input[0]
    vis2 = vis_input[1]
    channel = vis_input[2]
    stations = vis_input[3]
    lofar_stat_pos = vis_input[4]
    APPLY_IONO = vis_input[5]
    APPLY_BEAM = vis_input[6]
    MAKE_PLOTS = vis_input[7]
    UV_CUTOFF = vis_input[8]
    PIXELS_PER_BEAM = vis_input[9]
    POLDEF = vis_input[10]
    RESULTS_DIR = vis_input[11]
    FORCE_RESOLUTION = vis_input[12]
    ionRM1 = vis_input[13]
    times1 = vis_input[14]
    time_indices1 = vis_input[15]
    ionRM2 = vis_input[16]
    times2 = vis_input[17]
    time_indices2 = vis_input[18]
    twod_imaging = vis_input[19]
    npixel_advice = vis_input[20]
    cell_advice = vis_input[21]

    # Make a results directory on the worker:
    os.makedirs(RESULTS_DIR, exist_ok=True)

    # Redirect stdout, as Dask cannot print on workers
    # ------------------------------------------------------
    sys.stdout = open('%s/dask-log.txt' % (RESULTS_DIR), 'w')

    # Prepare Measurement Set
    # ------------------------------------------------------
    # Combine MSSS snapshots:
    vis = append_visibility(vis1, vis2)

    # Apply a uv-distance cut to the data:
    vis = uv_cut(vis, UV_CUTOFF)

    # Make some basic plots:
    if MAKE_PLOTS:
        uv_cov(vis)
        uv_dist(vis)

    # Imaging and Deconvolution
    # ------------------------------------------------------
    # Convert from XX/XY/YX/YY to I/Q/U/V:
    vis = convert_to_stokes(vis, POLDEF)

    # Image I, Q, U, V, per channel:
    if twod_imaging:
        dirty, psf = image_2d(vis, npixel_advice, cell_advice, channel,
                              RESULTS_DIR)
    else:
        dirty, psf = wstack(vis, npixel_advice, cell_advice, channel,
                            RESULTS_DIR)

    # Deconvolve (using complex Hogbom clean):
    comp, residual = deconvolve_cube_complex(dirty,
                                             psf,
                                             niter=100,
                                             threshold=0.001,
                                             fracthresh=0.001,
                                             window_shape='',
                                             gain=0.1,
                                             algorithm='hogbom-complex')

    # Convert resolution (FWHM in arcmin) to a psfwidth (standard deviation in pixels):
    clean_res = (((FORCE_RESOLUTION / 2.35482004503) / 60.0) * np.pi /
                 180.0) / cell_advice

    # Create the restored image:
    restored = restore_cube(comp, psf, residual, psfwidth=clean_res)

    # Save to disk:
    export_image_to_fits(
        restored, '%s/imaging_clean_WStack-%s.fits' % (RESULTS_DIR, channel))

    return restored
def main(args):
    """
    Initialising launch sequence.
    """
    # ------------------------------------------------------
    # Print some stuff to show that the code is running:
    print("")
    os.system(
        "printf 'A demonstration of a \033[5mDPrepB/DPrepC\033[m SDP pipeline\n'"
    )
    print("")
    # Set the directory for the moment images:
    MOMENTS_DIR = args.outputs + '/MOMENTS'
    # Check that the output directories exist, if not then create:
    os.makedirs(args.outputs, exist_ok=True)
    os.makedirs(MOMENTS_DIR, exist_ok=True)
    # Set the polarisation definition of the instrument:
    POLDEF = init_inst(args.inst)

    # Setup Variables for SIP services
    # ------------------------------------------------------
    # Define the Queue Producer settings:
    if args.queues:
        queue_settings = {
            'bootstrap.servers': 'scheduler:9092',
            'message.max.bytes': 100000000
        }  #10.60.253.31:9092

    # Setup the Confluent Kafka Queue
    # ------------------------------------------------------
    if args.queues:
        from confluent_kafka import Producer
        import pickle
        # Create an SDP queue:
        sip_queue = Producer(queue_settings)

    # Define a Data Array Format
    # ------------------------------------------------------
    def gen_data(channel):
        return np.array([
            vis1[channel], vis2[channel], channel, None, None, False, False,
            args.plots,
            float(args.uvcut),
            float(args.pixels), POLDEF, args.outputs,
            float(args.angres), None, None, None, None, None, None, args.twod,
            npixel_advice, cell_advice
        ])

    # Setup the Dask Cluster
    # ------------------------------------------------------
    starttime = t.time()

    dask.config.set(get=dask.distributed.Client.get)
    client = Client(
        args.daskaddress)  # scheduler for Docker container, localhost for P3.

    print("Dask Client details:")
    print(client)
    print("")

    # Define channel range for 1 subband, each containing 40 channels:
    channel_range = np.array(range(int(args.channels)))

    # Load the data into memory:
    """
    The input data should be interfaced with Buffer Management.
    """
    print("Loading data:")
    print("")
    vis1 = [
        load('%s/%s' % (args.inputs, args.ms1), range(channel, channel + 1),
             POLDEF) for channel in range(0, int(args.channels))
    ]
    vis2 = [
        load('%s/%s' % (args.inputs, args.ms2), range(channel, channel + 1),
             POLDEF) for channel in range(0, int(args.channels))
    ]

    # Prepare Measurement Set
    # ------------------------------------------------------
    # Combine MSSS snapshots:
    vis_advice = append_visibility(vis1[0], vis2[0])

    # Apply a uv-distance cut to the data:
    vis_advice = uv_cut(vis_advice, float(args.uvcut))
    npixel_advice, cell_advice = uv_advice(vis_advice, float(args.uvcut),
                                           float(args.pixels))

    # Begin imaging via the Dask cluster
    # ------------------------------------------------------
    # Submit data for each channel to the client, and return an image:

    # Scatter all the data in advance to all the workers:
    """
    The data here could be passed via Data Queues.
    Queues may not be ideal. Data throughput challenges.
    Need to think more about the optimum approach.
    """
    print("Scatter data to workers:")
    print("")
    big_job = [client.scatter(gen_data(channel)) for channel in channel_range]

    # Submit jobs to the cluster and create a list of futures:
    futures = [
        client.submit(dprepb_imaging, big_job[channel], pure=False, retries=3)
        for channel in channel_range
    ]
    """
    The dprepb_imaging function could generate QA, logging, and pass this information via Data Queues.
    Queues work well for this.
    Python logging calls are preferable. Send them to a text file on the node.
    Run another service that watches that file. Or just read from standard out.
    The Dockerisation will assist with logs.
    """

    print("Imaging on workers:")
    # Watch progress:
    progress(futures)

    # Wait until all futures are complete:
    wait(futures)

    # Check that no futures have errors, if so resubmit:
    for future in futures:
        if future.status == 'error':
            print("ERROR: Future", future, "has 'error' status, as:")
            print(client.recreate_error_locally(future))
            print("Rerunning...")
            print("")
            index = futures.index(future)
            futures[index].cancel()
            futures[index] = client.submit(dprepb_imaging,
                                           big_job[index],
                                           pure=False,
                                           retries=3)

    # Wait until all futures are complete:
    wait(futures)

    # Gather results from the futures:
    results = client.gather(futures, errors='raise')

    # Run QA on ARL objects and produce to queue:
    if args.queues:
        print("Adding QA to queue:")
        for result in results:
            sip_queue.produce('qa', pickle.dumps(qa_image(result), protocol=2))

        sip_queue.flush()

    # Return the data element of each ARL object, as a Dask future:
    futures = [
        client.submit(arl_data_future, result, pure=False, retries=3)
        for result in results
    ]

    progress(futures)

    wait(futures)

    # Calculate the Moment images
    # ------------------------------------------------------
    # Now use 'distributed Dask arrays' in order to parallelise the Moment image calculation:
    # Construct a small Dask array for every future:
    print("")
    print("Calculating Moment images:")
    print("")
    arrays = [
        da.from_delayed(future,
                        dtype=np.dtype('float64'),
                        shape=(1, 4, 512, 512)) for future in futures
    ]

    # Stack all small Dask arrays into one:
    stack = da.stack(arrays, axis=0)

    # Combine chunks to reduce overhead - is initially (40, 1, 4, 512, 512):
    stack = stack.rechunk((1, 1, 4, 64, 64))

    # Spread the data around on the cluster:
    stack = client.persist(stack)
    # Data is now coordinated by the single logical Dask array, 'stack'.

    # Save the Moment images:
    """
    The output moment images should be interfaced with Buffer Management.
    
    Need to know more about the Buffer specification.
    Related to initial data distribution also/staging.
    """
    print("Saving Moment images to disk:")
    print("")
    # First generate a template:
    image_template = import_image_from_fits('%s/imaging_dirty_WStack-%s.fits' %
                                            (args.outputs, 0))

    # Output mean images:
    # I:
    image_template.data = stack[:, :, 0, :, :].mean(axis=0).compute()
    # Run QA on ARL objects and produce to queue:
    if args.queues:
        sip_queue.produce('qa',
                          pickle.dumps(qa_image(image_template), protocol=2))
    # Export the data to disk:
    export_image_to_fits(image_template,
                         '%s/Mean-%s.fits' % (MOMENTS_DIR, 'I'))

    # Q:
    image_template.data = stack[:, :, 1, :, :].mean(axis=0).compute()
    # Run QA on ARL objects and produce to queue:
    if args.queues:
        sip_queue.produce('qa',
                          pickle.dumps(qa_image(image_template), protocol=2))
    # Export the data to disk:
    export_image_to_fits(image_template,
                         '%s/Mean-%s.fits' % (MOMENTS_DIR, 'Q'))

    # U:
    image_template.data = stack[:, :, 2, :, :].mean(axis=0).compute()
    # Run QA on ARL objects and produce to queue:
    if args.queues:
        sip_queue.produce('qa',
                          pickle.dumps(qa_image(image_template), protocol=2))
    # Export the data to disk:
    export_image_to_fits(image_template,
                         '%s/Mean-%s.fits' % (MOMENTS_DIR, 'U'))

    # P:
    image_template.data = da.sqrt(
        (da.square(stack[:, :, 1, :, :]) +
         da.square(stack[:, :, 2, :, :]))).mean(axis=0).compute()
    # Run QA on ARL objects and produce to queue:
    if args.queues:
        sip_queue.produce('qa',
                          pickle.dumps(qa_image(image_template), protocol=2))
    # Export the data to disk:
    export_image_to_fits(image_template,
                         '%s/Mean-%s.fits' % (MOMENTS_DIR, 'P'))

    # Output standard deviation images:
    # I:
    image_template.data = stack[:, :, 0, :, :].std(axis=0).compute()
    # Run QA on ARL objects and produce to queue:
    if args.queues:
        sip_queue.produce('qa',
                          pickle.dumps(qa_image(image_template), protocol=2))
    # Export the data to disk:
    export_image_to_fits(image_template, '%s/Std-%s.fits' % (MOMENTS_DIR, 'I'))

    # Q:
    image_template.data = stack[:, :, 1, :, :].std(axis=0).compute()
    # Run QA on ARL objects and produce to queue:
    if args.queues:
        sip_queue.produce('qa',
                          pickle.dumps(qa_image(image_template), protocol=2))
    # Export the data to disk:
    export_image_to_fits(image_template, '%s/Std-%s.fits' % (MOMENTS_DIR, 'Q'))

    # U:
    image_template.data = stack[:, :, 2, :, :].std(axis=0).compute()
    # Run QA on ARL objects and produce to queue:
    if args.queues:
        sip_queue.produce('qa',
                          pickle.dumps(qa_image(image_template), protocol=2))
    # Export the data to disk:
    export_image_to_fits(image_template, '%s/Std-%s.fits' % (MOMENTS_DIR, 'U'))

    # P:
    image_template.data = da.sqrt(
        (da.square(stack[:, :, 1, :, :]) +
         da.square(stack[:, :, 2, :, :]))).std(axis=0).compute()
    # Run QA on ARL objects and produce to queue:
    if args.queues:
        sip_queue.produce('qa',
                          pickle.dumps(qa_image(image_template), protocol=2))
    # Export the data to disk:
    export_image_to_fits(image_template, '%s/Std-%s.fits' % (MOMENTS_DIR, 'P'))

    # Flush queue:
    if args.queues:
        sip_queue.flush()

    # Make a tarball of moment images:
    subprocess.call([
        'tar', '-cvf',
        '%s/moment.tar' % (MOMENTS_DIR),
        '%s/' % (MOMENTS_DIR)
    ])
    subprocess.call(['gzip', '-9f', '%s/moment.tar' % (MOMENTS_DIR)])

    endtime = t.time()
    print(endtime - starttime)