Example #1
0
def test_progress_function(loop, capsys):
    with cluster() as (c, [a, b]):
        with Executor(('127.0.0.1', c['port']), loop=loop) as e:
            f = e.submit(lambda: 1)
            g = e.submit(lambda: 2)

            progress([[f], [[g]]], notebook=False)
            check_bar_completed(capsys)
Example #2
0
def test_fast(loop):
    with cluster() as (c, [a, b]):
        with Executor(('127.0.0.1', c['port']), loop=loop) as e:
            L = e.map(inc, range(100))
            L2 = e.map(dec, L)
            L3 = e.map(add, L, L2)
            p = progress(L3, multi=True, complete=True, notebook=True)
            assert set(p.all_keys) == {'inc', 'dec', 'add'}
Example #3
0
    from conf_analysis.meg import artifacts, preprocessing
    from conf_analysis.behavior import empirical, metadata, keymap
    import mne, locale
    import numpy as np
    import pickle
    locale.setlocale(locale.LC_ALL, "en_US")
    result = {}
    raw = mne.io.read_raw_ctf(filename, system_clock='ignore')
    trials = preprocessing.blocks(raw, full_file_cache=True)
    trl, bl = trials['trial'], trials['block']
    bcnt = 0
    for b in np.unique(bl):
        if len(trl[bl == b]) >= 75:
            result[b] = bcnt
            bcnt += 1
        print((b, bcnt))
    return result


block_map = {}
for snum in range(1, 16):
    filenames = [metadata.get_raw_filename(snum, b) for b in range(4)]
    block_map[snum] = {}
    for session, filename in enumerate(filenames):
        block_map[snum][session] = executor.submit(do_one, filename)

diagnostics.progress(block_map)
block_map = executor.gather(block_map)

pickle.dump(block_map, open('blockmap.pickle', 'w'))
def main(args):
    """
    Initialising launch sequence.
    """
    # ------------------------------------------------------
    # Print some stuff to show that the code is running:
    print("")
    os.system(
        "printf 'A demonstration of a \033[5mDPrepB/DPrepC\033[m SDP pipeline\n'"
    )
    print("")
    # Set the directory for the moment images:
    MOMENTS_DIR = args.outputs + '/MOMENTS'
    # Check that the output directories exist, if not then create:
    os.makedirs(args.outputs, exist_ok=True)
    os.makedirs(MOMENTS_DIR, exist_ok=True)
    # Set the polarisation definition of the instrument:
    POLDEF = init_inst(args.inst)

    # Setup Variables for SIP services
    # ------------------------------------------------------
    # Define the Queue Producer settings:
    if args.queues:
        queue_settings = {
            'bootstrap.servers': 'scheduler:9092',
            'message.max.bytes': 100000000
        }  #10.60.253.31:9092

    # Setup the Confluent Kafka Queue
    # ------------------------------------------------------
    if args.queues:
        from confluent_kafka import Producer
        import pickle
        # Create an SDP queue:
        sip_queue = Producer(queue_settings)

    # Define a Data Array Format
    # ------------------------------------------------------
    def gen_data(channel):
        return np.array([
            vis1[channel], vis2[channel], channel, None, None, False, False,
            args.plots,
            float(args.uvcut),
            float(args.pixels), POLDEF, args.outputs,
            float(args.angres), None, None, None, None, None, None, args.twod,
            npixel_advice, cell_advice
        ])

    # Setup the Dask Cluster
    # ------------------------------------------------------
    starttime = t.time()

    dask.config.set(get=dask.distributed.Client.get)
    client = Client(
        args.daskaddress)  # scheduler for Docker container, localhost for P3.

    print("Dask Client details:")
    print(client)
    print("")

    # Define channel range for 1 subband, each containing 40 channels:
    channel_range = np.array(range(int(args.channels)))

    # Load the data into memory:
    """
    The input data should be interfaced with Buffer Management.
    """
    print("Loading data:")
    print("")
    vis1 = [
        load('%s/%s' % (args.inputs, args.ms1), range(channel, channel + 1),
             POLDEF) for channel in range(0, int(args.channels))
    ]
    vis2 = [
        load('%s/%s' % (args.inputs, args.ms2), range(channel, channel + 1),
             POLDEF) for channel in range(0, int(args.channels))
    ]

    # Prepare Measurement Set
    # ------------------------------------------------------
    # Combine MSSS snapshots:
    vis_advice = append_visibility(vis1[0], vis2[0])

    # Apply a uv-distance cut to the data:
    vis_advice = uv_cut(vis_advice, float(args.uvcut))
    npixel_advice, cell_advice = uv_advice(vis_advice, float(args.uvcut),
                                           float(args.pixels))

    # Begin imaging via the Dask cluster
    # ------------------------------------------------------
    # Submit data for each channel to the client, and return an image:

    # Scatter all the data in advance to all the workers:
    """
    The data here could be passed via Data Queues.
    Queues may not be ideal. Data throughput challenges.
    Need to think more about the optimum approach.
    """
    print("Scatter data to workers:")
    print("")
    big_job = [client.scatter(gen_data(channel)) for channel in channel_range]

    # Submit jobs to the cluster and create a list of futures:
    futures = [
        client.submit(dprepb_imaging, big_job[channel], pure=False, retries=3)
        for channel in channel_range
    ]
    """
    The dprepb_imaging function could generate QA, logging, and pass this information via Data Queues.
    Queues work well for this.
    Python logging calls are preferable. Send them to a text file on the node.
    Run another service that watches that file. Or just read from standard out.
    The Dockerisation will assist with logs.
    """

    print("Imaging on workers:")
    # Watch progress:
    progress(futures)

    # Wait until all futures are complete:
    wait(futures)

    # Check that no futures have errors, if so resubmit:
    for future in futures:
        if future.status == 'error':
            print("ERROR: Future", future, "has 'error' status, as:")
            print(client.recreate_error_locally(future))
            print("Rerunning...")
            print("")
            index = futures.index(future)
            futures[index].cancel()
            futures[index] = client.submit(dprepb_imaging,
                                           big_job[index],
                                           pure=False,
                                           retries=3)

    # Wait until all futures are complete:
    wait(futures)

    # Gather results from the futures:
    results = client.gather(futures, errors='raise')

    # Run QA on ARL objects and produce to queue:
    if args.queues:
        print("Adding QA to queue:")
        for result in results:
            sip_queue.produce('qa', pickle.dumps(qa_image(result), protocol=2))

        sip_queue.flush()

    # Return the data element of each ARL object, as a Dask future:
    futures = [
        client.submit(arl_data_future, result, pure=False, retries=3)
        for result in results
    ]

    progress(futures)

    wait(futures)

    # Calculate the Moment images
    # ------------------------------------------------------
    # Now use 'distributed Dask arrays' in order to parallelise the Moment image calculation:
    # Construct a small Dask array for every future:
    print("")
    print("Calculating Moment images:")
    print("")
    arrays = [
        da.from_delayed(future,
                        dtype=np.dtype('float64'),
                        shape=(1, 4, 512, 512)) for future in futures
    ]

    # Stack all small Dask arrays into one:
    stack = da.stack(arrays, axis=0)

    # Combine chunks to reduce overhead - is initially (40, 1, 4, 512, 512):
    stack = stack.rechunk((1, 1, 4, 64, 64))

    # Spread the data around on the cluster:
    stack = client.persist(stack)
    # Data is now coordinated by the single logical Dask array, 'stack'.

    # Save the Moment images:
    """
    The output moment images should be interfaced with Buffer Management.
    
    Need to know more about the Buffer specification.
    Related to initial data distribution also/staging.
    """
    print("Saving Moment images to disk:")
    print("")
    # First generate a template:
    image_template = import_image_from_fits('%s/imaging_dirty_WStack-%s.fits' %
                                            (args.outputs, 0))

    # Output mean images:
    # I:
    image_template.data = stack[:, :, 0, :, :].mean(axis=0).compute()
    # Run QA on ARL objects and produce to queue:
    if args.queues:
        sip_queue.produce('qa',
                          pickle.dumps(qa_image(image_template), protocol=2))
    # Export the data to disk:
    export_image_to_fits(image_template,
                         '%s/Mean-%s.fits' % (MOMENTS_DIR, 'I'))

    # Q:
    image_template.data = stack[:, :, 1, :, :].mean(axis=0).compute()
    # Run QA on ARL objects and produce to queue:
    if args.queues:
        sip_queue.produce('qa',
                          pickle.dumps(qa_image(image_template), protocol=2))
    # Export the data to disk:
    export_image_to_fits(image_template,
                         '%s/Mean-%s.fits' % (MOMENTS_DIR, 'Q'))

    # U:
    image_template.data = stack[:, :, 2, :, :].mean(axis=0).compute()
    # Run QA on ARL objects and produce to queue:
    if args.queues:
        sip_queue.produce('qa',
                          pickle.dumps(qa_image(image_template), protocol=2))
    # Export the data to disk:
    export_image_to_fits(image_template,
                         '%s/Mean-%s.fits' % (MOMENTS_DIR, 'U'))

    # P:
    image_template.data = da.sqrt(
        (da.square(stack[:, :, 1, :, :]) +
         da.square(stack[:, :, 2, :, :]))).mean(axis=0).compute()
    # Run QA on ARL objects and produce to queue:
    if args.queues:
        sip_queue.produce('qa',
                          pickle.dumps(qa_image(image_template), protocol=2))
    # Export the data to disk:
    export_image_to_fits(image_template,
                         '%s/Mean-%s.fits' % (MOMENTS_DIR, 'P'))

    # Output standard deviation images:
    # I:
    image_template.data = stack[:, :, 0, :, :].std(axis=0).compute()
    # Run QA on ARL objects and produce to queue:
    if args.queues:
        sip_queue.produce('qa',
                          pickle.dumps(qa_image(image_template), protocol=2))
    # Export the data to disk:
    export_image_to_fits(image_template, '%s/Std-%s.fits' % (MOMENTS_DIR, 'I'))

    # Q:
    image_template.data = stack[:, :, 1, :, :].std(axis=0).compute()
    # Run QA on ARL objects and produce to queue:
    if args.queues:
        sip_queue.produce('qa',
                          pickle.dumps(qa_image(image_template), protocol=2))
    # Export the data to disk:
    export_image_to_fits(image_template, '%s/Std-%s.fits' % (MOMENTS_DIR, 'Q'))

    # U:
    image_template.data = stack[:, :, 2, :, :].std(axis=0).compute()
    # Run QA on ARL objects and produce to queue:
    if args.queues:
        sip_queue.produce('qa',
                          pickle.dumps(qa_image(image_template), protocol=2))
    # Export the data to disk:
    export_image_to_fits(image_template, '%s/Std-%s.fits' % (MOMENTS_DIR, 'U'))

    # P:
    image_template.data = da.sqrt(
        (da.square(stack[:, :, 1, :, :]) +
         da.square(stack[:, :, 2, :, :]))).std(axis=0).compute()
    # Run QA on ARL objects and produce to queue:
    if args.queues:
        sip_queue.produce('qa',
                          pickle.dumps(qa_image(image_template), protocol=2))
    # Export the data to disk:
    export_image_to_fits(image_template, '%s/Std-%s.fits' % (MOMENTS_DIR, 'P'))

    # Flush queue:
    if args.queues:
        sip_queue.flush()

    # Make a tarball of moment images:
    subprocess.call([
        'tar', '-cvf',
        '%s/moment.tar' % (MOMENTS_DIR),
        '%s/' % (MOMENTS_DIR)
    ])
    subprocess.call(['gzip', '-9f', '%s/moment.tar' % (MOMENTS_DIR)])

    endtime = t.time()
    print(endtime - starttime)