def tearDown(self): try: import time time.sleep(5.0) arlexecute.close() except: pass
def tearDown(self): from wrappers.arlexecute.execution_support.arlexecute import arlexecute arlexecute.close() if arlexecute.using_dlg: from dlg import utils utils.terminate_or_kill(self.nm_proc, 10) super(ARLExecuteTestCase, self).tearDown()
def test_useFunction(self): def square(x): return x**2 arlexecute.set_client(use_dask=False) graph = arlexecute.execute(square)(numpy.arange(10)) assert (arlexecute.compute(graph) == numpy.array( [0, 1, 4, 9, 16, 25, 36, 49, 64, 81])).all() arlexecute.close()
def test_useDaskSync(self): def square(x): return x**2 arlexecute.set_client(use_dask=True) graph = arlexecute.execute(square)(numpy.arange(10)) result = arlexecute.compute(graph, sync=True) assert (result == numpy.array([0, 1, 4, 9, 16, 25, 36, 49, 64, 81])).all() arlexecute.close()
def trial_case(results, seed=180555, context='wstack', nworkers=8, threads_per_worker=1, memory=8, processes=True, order='frequency', nfreqwin=7, ntimes=3, rmax=750.0, facets=1, wprojection_planes=1, use_dask=True, use_serial_imaging=True, flux_limit=0.3, nmajor=5, dft_threshold=1.0, use_serial_clean=True, write_fits=False): """ Single trial for performance-timings Simulates visibilities from GLEAM including phase errors Makes dirty image and PSF Runs ICAL pipeline The results are in a dictionary: 'context': input - a string describing concisely the purpose of the test 'time overall', overall execution time (s) 'time predict', time to execute GLEAM prediction graph 'time invert', time to make dirty image 'time invert graph', time to make dirty image graph 'time ICAL graph', time to create ICAL graph 'time ICAL', time to execute ICAL graph 'context', type of imaging e.g. 'wstack' 'nworkers', number of workers to create 'threads_per_worker', 'nnodes', Number of nodes, 'processes', 'order', Ordering of data_models 'nfreqwin', Number of frequency windows in simulation 'ntimes', Number of hour angles in simulation 'rmax', Maximum radius of stations used in simulation (m) 'facets', Number of facets in deconvolution and imaging 'wprojection_planes', Number of wprojection planes 'vis_slices', Number of visibility slices (per Visibbility) 'npixel', Number of pixels in image 'cellsize', Cellsize in radians 'seed', Random number seed 'dirty_max', Maximum in dirty image 'dirty_min', Minimum in dirty image 'restored_max', 'restored_min', 'deconvolved_max', 'deconvolved_min', 'residual_max', 'residual_min', 'git_info', GIT hash (not definitive since local mods are possible) :param results: Initial state :param seed: Random number seed (used in gain simulations) :param context: imaging context :param context: Type of context: '2d'|'timeslice'|'wstack' :param nworkers: Number of dask workers to use :param threads_per_worker: Number of threads per worker :param processes: Use processes instead of threads 'processes'|'threads' :param order: See simulate_list_list_arlexecute_workflow_workflowkflow :param nfreqwin: See simulate_list_list_arlexecute_workflow_workflowkflow :param ntimes: See simulate_list_list_arlexecute_workflow_workflowkflow :param rmax: See simulate_list_list_arlexecute_workflow_workflowkflow :param facets: Number of facets to use :param wprojection_planes: Number of wprojection planes to use :param use_dask: Use dask or immediate evaluation :return: results dictionary """ if use_dask: client = get_dask_Client(threads_per_worker=threads_per_worker, processes=threads_per_worker == 1, memory_limit=memory * 1024 * 1024 * 1024, n_workers=nworkers) arlexecute.set_client(client) nodes = findNodes(arlexecute.client) print("Defined %d workers on %d nodes" % (nworkers, len(nodes))) print("Workers are: %s" % str(nodes)) else: arlexecute.set_client(use_dask=use_dask) results['nnodes'] = 1 def init_logging(): logging.basicConfig( filename='pipelines_arlexecute_timings.log', filemode='w', format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s', datefmt='%H:%M:%S', level=logging.INFO) init_logging() log = logging.getLogger() # Initialise logging on the workers. This appears to only work using the process scheduler. arlexecute.run(init_logging) def lprint(*args): log.info(*args) print(*args) lprint("Starting pipelines_arlexecute_timings") numpy.random.seed(seed) results['seed'] = seed start_all = time.time() results['context'] = context results['hostname'] = socket.gethostname() results['git_hash'] = git_hash() results['epoch'] = time.strftime("%Y-%m-%d %H:%M:%S") lprint("Context is %s" % context) results['nworkers'] = nworkers results['threads_per_worker'] = threads_per_worker results['processes'] = processes results['memory'] = memory results['order'] = order results['nfreqwin'] = nfreqwin results['ntimes'] = ntimes results['rmax'] = rmax results['facets'] = facets results['wprojection_planes'] = wprojection_planes results['dft threshold'] = dft_threshold results['use_dask'] = use_dask lprint("At start, configuration is:") lprint(sort_dict(results)) # Parameters determining scale of simulation. frequency = numpy.linspace(1.0e8, 1.2e8, nfreqwin) centre = nfreqwin // 2 if nfreqwin > 1: channel_bandwidth = numpy.array(nfreqwin * [frequency[1] - frequency[0]]) else: channel_bandwidth = numpy.array([1e6]) times = numpy.linspace(-numpy.pi / 4.0, numpy.pi / 4.0, ntimes) phasecentre = SkyCoord(ra=+0.0 * u.deg, dec=-40.0 * u.deg, frame='icrs', equinox='J2000') lprint("****** Visibility creation ******") # Create the empty BlockVisibility's and persist these on the cluster tmp_bvis_list = simulate_list_arlexecute_workflow( 'LOWBD2', frequency=frequency, channel_bandwidth=channel_bandwidth, times=times, phasecentre=phasecentre, order=order, format='blockvis', rmax=rmax) tmp_vis_list = [ arlexecute.execute(convert_blockvisibility_to_visibility)(bv) for bv in tmp_bvis_list ] tmp_vis_list = arlexecute.client.compute(tmp_vis_list, sync=True) vis_list = arlexecute.gather(tmp_vis_list) import matplotlib.pyplot as plt plt.clf() plt.hist(vis_list[0].w, bins=100) plt.title('Histogram of w samples: rms=%.1f (wavelengths)' % numpy.std(vis_list[0].w)) plt.xlabel('W (wavelengths)') plt.show() plt.clf() plt.hist(vis_list[0].uvdist, bins=100) plt.title('Histogram of uvdistance samples') plt.xlabel('UV Distance (wavelengths)') plt.show() arlexecute.client.cancel(tmp_vis_list) future_vis_list = arlexecute.scatter(vis_list) # Find the best imaging parameters but don't bring the vis_list back here print("****** Finding wide field parameters ******") future_advice = [ arlexecute.execute(advise_wide_field)( v, guard_band_image=6.0, delA=0.1, facets=facets, wprojection_planes=wprojection_planes, oversampling_synthesised_beam=4.0) for v in future_vis_list ] future_advice = arlexecute.compute(future_advice) advice = arlexecute.client.gather(future_advice)[-1] arlexecute.client.cancel(future_advice) # Deconvolution via sub-images requires 2^n npixel = advice['npixels2'] results['npixel'] = npixel cellsize = advice['cellsize'] results['cellsize'] = cellsize lprint("Image will have %d by %d pixels, cellsize = %.6f rad" % (npixel, npixel, cellsize)) # Create an empty model image tmp_model_list = [ arlexecute.execute(create_image)( npixel=npixel, cellsize=cellsize, frequency=[frequency[f]], channel_bandwidth=[channel_bandwidth[f]], phasecentre=phasecentre, polarisation_frame=PolarisationFrame("stokesI")) for f, freq in enumerate(frequency) ] model_list = arlexecute.compute(tmp_model_list, sync=True) future_model_list = arlexecute.scatter(model_list) lprint("****** Setting up imaging parameters ******") # Now set up the imaging parameters template_model = create_image( npixel=npixel, cellsize=cellsize, frequency=[frequency[centre]], phasecentre=phasecentre, channel_bandwidth=[channel_bandwidth[centre]], polarisation_frame=PolarisationFrame("stokesI")) gcfcf = [create_pswf_convolutionfunction(template_model)] if context == 'timeslice': vis_slices = ntimes lprint("Using timeslice with %d slices" % vis_slices) elif context == '2d': vis_slices = 1 elif context == "wprojection": wstep = advice['wstep'] nw = advice['wprojection_planes'] vis_slices = 1 support = advice['nwpixels'] results['wprojection_planes'] = nw lprint("****** Starting W projection kernel creation ******") lprint("Using wprojection with %d planes with wstep %.1f wavelengths" % (nw, wstep)) lprint("Support of wprojection = %d pixels" % support) gcfcf = [ create_awterm_convolutionfunction(template_model, nw=nw, wstep=wstep, oversampling=4, support=support, use_aaf=True) ] lprint("Size of W projection gcf, cf = %.2E bytes" % get_size(gcfcf)) else: context = 'wstack' vis_slices = advice['vis_slices'] lprint("Using wstack with %d slices" % vis_slices) gcfcf = arlexecute.scatter(gcfcf, broadcast=True) results['vis_slices'] = vis_slices # Make a skymodel from gleam, with bright sources as components and weak sources in an image lprint("****** Starting GLEAM skymodel creation ******") future_skymodel_list = [ arlexecute.execute(create_low_test_skymodel_from_gleam)( npixel=npixel, cellsize=cellsize, frequency=[frequency[f]], phasecentre=phasecentre, polarisation_frame=PolarisationFrame("stokesI"), flux_limit=flux_limit, flux_threshold=dft_threshold, flux_max=5.0) for f, freq in enumerate(frequency) ] # We use predict_skymodel so that we can use skycomponents as well as images lprint("****** Starting GLEAM skymodel prediction ******") predicted_vis_list = [ predict_skymodel_list_arlexecute_workflow(future_vis_list[f], [future_skymodel_list[f]], context=context, vis_slices=vis_slices, facets=facets, gcfcf=gcfcf)[0] for f, freq in enumerate(frequency) ] # Corrupt the visibility for the GLEAM model lprint("****** Visibility corruption ******") tmp_corrupted_vis_list = corrupt_list_arlexecute_workflow( predicted_vis_list, phase_error=1.0, seed=seed) lprint("****** Weighting and tapering ******") tmp_corrupted_vis_list = weight_list_arlexecute_workflow( tmp_corrupted_vis_list, future_model_list) tmp_corrupted_vis_list = taper_list_arlexecute_workflow( tmp_corrupted_vis_list, 0.003 * 750.0 / rmax) tmp_corrupted_vis_list = arlexecute.compute(tmp_corrupted_vis_list, sync=True) corrupted_vis_list = arlexecute.gather(tmp_corrupted_vis_list) # arlexecute.client.cancel(predicted_vis_list) arlexecute.client.cancel(tmp_corrupted_vis_list) future_corrupted_vis_list = arlexecute.scatter(corrupted_vis_list) # At this point the only futures are of scatter'ed data so no repeated calculations should be # incurred. lprint("****** Starting dirty image calculation ******") start = time.time() dirty_list = invert_list_arlexecute_workflow( future_corrupted_vis_list, future_model_list, vis_slices=vis_slices, context=context, facets=facets, use_serial_invert=use_serial_imaging, gcfcf=gcfcf) results['size invert graph'] = get_size(dirty_list) lprint('Size of dirty graph is %.3E bytes' % (results['size invert graph'])) end = time.time() results['time invert graph'] = end - start lprint("Construction of invert graph took %.3f seconds" % (end - start)) start = time.time() dirty, sumwt = arlexecute.compute(dirty_list, sync=True)[centre] end = time.time() results['time invert'] = end - start lprint("Dirty image invert took %.3f seconds" % (end - start)) lprint("Maximum in dirty image is %f, sumwt is %s" % (numpy.max(numpy.abs(dirty.data)), str(sumwt))) qa = qa_image(dirty) results['dirty_max'] = qa.data['max'] results['dirty_min'] = qa.data['min'] if write_fits: export_image_to_fits( dirty, "pipelines_arlexecute_timings-%s-dirty.fits" % context) lprint("****** Starting prediction ******") start = time.time() tmp_vis_list = predict_list_arlexecute_workflow( future_corrupted_vis_list, future_model_list, vis_slices=vis_slices, context=context, facets=facets, use_serial_predict=use_serial_imaging, gcfcf=gcfcf) result = arlexecute.compute(tmp_vis_list, sync=True) # arlexecute.client.cancel(tmp_vis_list) end = time.time() results['time predict'] = end - start lprint("Predict took %.3f seconds" % (end - start)) # Create the ICAL pipeline to run major cycles, starting selfcal at cycle 1. A global solution across all # frequencies (i.e. Visibilities) is performed. print("Using subimage clean") deconvolve_facets = 8 deconvolve_overlap = 16 deconvolve_taper = 'tukey' lprint("****** Starting ICAL graph creation ******") controls = create_calibration_controls() controls['T']['first_selfcal'] = 1 controls['T']['timescale'] = 'auto' start = time.time() ical_list = ical_list_arlexecute_workflow( future_corrupted_vis_list, model_imagelist=future_model_list, context=context, vis_slices=vis_slices, scales=[0, 3, 10], algorithm='mmclean', nmoment=3, niter=1000, fractional_threshold=0.1, threshold=0.01, nmajor=nmajor, gain=0.25, psf_support=64, deconvolve_facets=deconvolve_facets, deconvolve_overlap=deconvolve_overlap, deconvolve_taper=deconvolve_taper, timeslice='auto', global_solution=True, do_selfcal=True, calibration_context='T', controls=controls, use_serial_predict=use_serial_imaging, use_serial_invert=use_serial_imaging, use_serial_clean=use_serial_clean, gcfcf=gcfcf) results['size ICAL graph'] = get_size(ical_list) lprint('Size of ICAL graph is %.3E bytes' % results['size ICAL graph']) end = time.time() results['time ICAL graph'] = end - start lprint("Construction of ICAL graph took %.3f seconds" % (end - start)) print("Current objects on cluster: ") pp.pprint(arlexecute.client.who_has()) # # Execute the graph lprint("****** Executing ICAL graph ******") start = time.time() deconvolved, residual, restored, gaintables = arlexecute.compute(ical_list, sync=True) end = time.time() results['time ICAL'] = end - start lprint("ICAL graph execution took %.3f seconds" % (end - start)) qa = qa_image(deconvolved[centre]) results['deconvolved_max'] = qa.data['max'] results['deconvolved_min'] = qa.data['min'] deconvolved_cube = image_gather_channels(deconvolved) if write_fits: export_image_to_fits( deconvolved_cube, "pipelines_arlexecute_timings-%s-ical_deconvolved.fits" % context) qa = qa_image(residual[centre][0]) results['residual_max'] = qa.data['max'] results['residual_min'] = qa.data['min'] residual_cube = remove_sumwt(residual) residual_cube = image_gather_channels(residual_cube) if write_fits: export_image_to_fits( residual_cube, "pipelines_arlexecute_timings-%s-ical_residual.fits" % context) qa = qa_image(restored[centre]) results['restored_max'] = qa.data['max'] results['restored_min'] = qa.data['min'] restored_cube = image_gather_channels(restored) if write_fits: export_image_to_fits( restored_cube, "pipelines_arlexecute_timings-%s-ical_restored.fits" % context) # arlexecute.close() end_all = time.time() results['time overall'] = end_all - start_all lprint("At end, results are:") results = sort_dict(results) lprint(results) return results
def tearDown(self): try: arlexecute.close() except: pass
frequency=[frequency[f]], channel_bandwidth=[channel_bandwidth[f]], cellsize=cellsize, phasecentre=phasecentre, polarisation_frame=PolarisationFrame("stokesI"), flux_limit=1.0, applybeam=True) for f, freq in enumerate(frequency)] print('About to make GLEAM model') gleam_model = arlexecute.persist(gleam_model) print('About to run predict to get predicted visibility') vis_list = arlexecute.scatter(vis_list) predicted_vislist = predict_list_arlexecute_workflow(vis_list, gleam_model, context='wstack', vis_slices=vis_slices) predicted_vislist = arlexecute.persist(predicted_vislist) corrupted_vislist = corrupt_list_arlexecute_workflow(predicted_vislist, phase_error=1.0) print('About to run corrupt to get corrupted visibility') corrupted_vislist = arlexecute.persist(corrupted_vislist) corrupted_vislist = arlexecute.compute(corrupted_vislist, sync=True) export_blockvisibility_to_hdf5(corrupted_vislist, 'gleam_simulation_vislist.hdf') gleam_model = arlexecute.compute(gleam_model, sync=True) gleam_skymodel = SkyModel(image=gleam_model) export_skymodel_to_hdf5(gleam_skymodel, 'gleam_simulation_skymodel.hdf') arlexecute.close()
def tearDown(self): arlexecute.close()
def trial_case(results, seed=180555, context='wstack', nworkers=8, threads_per_worker=1, memory=8, processes=True, order='frequency', nfreqwin=7, ntimes=3, rmax=750.0, facets=1, wprojection_planes=1, use_dask=True, use_serial=False): """ Single trial for performance-timings Simulates visibilities from GLEAM including phase errors Makes dirty image and PSF Runs ICAL pipeline The results are in a dictionary: 'context': input - a string describing concisely the purpose of the test 'time overall', overall execution time (s) 'time create gleam', time to create GLEAM prediction graph 'time predict', time to execute GLEAM prediction graph 'time corrupt', time to corrupt data_models 'time invert', time to make dirty image 'time psf invert', time to make PSF 'time ICAL graph', time to create ICAL graph 'time ICAL', time to execute ICAL graph 'context', type of imaging e.g. 'wstack' 'nworkers', number of workers to create 'threads_per_worker', 'nnodes', Number of nodes, 'processes', 'order', Ordering of data_models 'nfreqwin', Number of frequency windows in simulation 'ntimes', Number of hour angles in simulation 'rmax', Maximum radius of stations used in simulation (m) 'facets', Number of facets in deconvolution and imaging 'wprojection_planes', Number of wprojection planes 'vis_slices', Number of visibility slices (per Visibbility) 'npixel', Number of pixels in image 'cellsize', Cellsize in radians 'seed', Random number seed 'dirty_max', Maximum in dirty image 'dirty_min', Minimum in dirty image 'psf_max', 'psf_min', 'restored_max', 'restored_min', 'deconvolved_max', 'deconvolved_min', 'residual_max', 'residual_min', 'git_info', GIT hash (not definitive since local mods are possible) :param results: Initial state :param seed: Random number seed (used in gain simulations) :param context: imaging context :param context: Type of context: '2d'|'timeslice'|'wstack' :param nworkers: Number of dask workers to use :param threads_per_worker: Number of threads per worker :param processes: Use processes instead of threads 'processes'|'threads' :param order: See simulate_list_list_arlexecute_workflow_workflowkflow :param nfreqwin: See simulate_list_list_arlexecute_workflow_workflowkflow :param ntimes: See simulate_list_list_arlexecute_workflow_workflowkflow :param rmax: See simulate_list_list_arlexecute_workflow_workflowkflow :param facets: Number of facets to use :param wprojection_planes: Number of wprojection planes to use :param use_dask: Use dask or immediate evaluation :param kwargs: :return: results dictionary """ numpy.random.seed(seed) results['seed'] = seed start_all = time.time() results['context'] = context results['hostname'] = socket.gethostname() results['git_hash'] = git_hash() results['epoch'] = time.strftime("%Y-%m-%d %H:%M:%S") zerow = False print("Context is %s" % context) results['nworkers'] = nworkers results['threads_per_worker'] = threads_per_worker results['processes'] = processes results['memory'] = memory results['order'] = order results['nfreqwin'] = nfreqwin results['ntimes'] = ntimes results['rmax'] = rmax results['facets'] = facets results['wprojection_planes'] = wprojection_planes results['use_dask'] = use_dask print("At start, configuration is {0!r}".format(results)) # Parameters determining scale frequency = numpy.linspace(0.8e8, 1.2e8, nfreqwin) centre = nfreqwin // 2 if nfreqwin > 1: channel_bandwidth = numpy.array(nfreqwin * [frequency[1] - frequency[0]]) else: channel_bandwidth = numpy.array([1e6]) times = numpy.linspace(-numpy.pi / 3.0, numpy.pi / 3.0, ntimes) phasecentre = SkyCoord(ra=+30.0 * u.deg, dec=-60.0 * u.deg, frame='icrs', equinox='J2000') if use_dask: client = get_dask_Client(threads_per_worker=threads_per_worker, memory_limit=memory * 1024 * 1024 * 1024, n_workers=nworkers) arlexecute.set_client(client) nodes = findNodes(arlexecute.client) unodes = list(numpy.unique(nodes)) results['nnodes'] = len(unodes) print("Defined %d workers on %d nodes" % (nworkers, results['nnodes'])) print("Workers are: %s" % str(nodes)) else: arlexecute.set_client(use_dask=use_dask) results['nnodes'] = 1 unodes = None vis_list = simulate_list_arlexecute_workflow( 'LOWBD2', frequency=frequency, channel_bandwidth=channel_bandwidth, times=times, phasecentre=phasecentre, order=order, format='blockvis', rmax=rmax) print("****** Visibility creation ******") vis_list = arlexecute.persist(vis_list) # Find the best imaging parameters but don't bring the vis_list back here def get_wf(bv): v = convert_blockvisibility_to_visibility(bv) return advise_wide_field(v, guard_band_image=6.0, delA=0.02, facets=facets, wprojection_planes=wprojection_planes, oversampling_synthesised_beam=4.0) wprojection_planes = 1 advice = arlexecute.compute(arlexecute.execute(get_wf)(vis_list[0]), sync=True) npixel = advice['npixels2'] cellsize = advice['cellsize'] if context == 'timeslice': vis_slices = ntimes print("Using timeslice with %d slices" % vis_slices) elif context == '2d': vis_slices = 1 else: context = 'wstack' vis_slices = 5 * advice['vis_slices'] print("Using wstack with %d slices" % vis_slices) results['vis_slices'] = vis_slices results['cellsize'] = cellsize results['npixel'] = npixel gleam_model_list = [ arlexecute.execute(create_low_test_image_from_gleam)( npixel=npixel, frequency=[frequency[f]], channel_bandwidth=[channel_bandwidth[f]], cellsize=cellsize, phasecentre=phasecentre, polarisation_frame=PolarisationFrame("stokesI"), flux_limit=0.3, applybeam=True) for f, freq in enumerate(frequency) ] start = time.time() print("****** Starting GLEAM model creation ******") gleam_model_list = arlexecute.compute(gleam_model_list, sync=True) cmodel = smooth_image(gleam_model_list[centre]) export_image_to_fits(cmodel, "pipelines-timings-arlexecute-gleam_cmodel.fits") end = time.time() results['time create gleam'] = end - start print("Creating GLEAM model took %.2f seconds" % (end - start)) gleam_model_list = arlexecute.scatter(gleam_model_list) vis_list = predict_list_arlexecute_workflow(vis_list, gleam_model_list, vis_slices=vis_slices, context=context) start = time.time() print("****** Starting GLEAM model visibility prediction ******") vis_list = arlexecute.compute(vis_list, sync=True) end = time.time() results['time predict'] = end - start print("GLEAM model Visibility prediction took %.2f seconds" % (end - start)) # Corrupt the visibility for the GLEAM model print("****** Visibility corruption ******") vis_list = corrupt_list_arlexecute_workflow(vis_list, phase_error=1.0) start = time.time() vis_list = arlexecute.compute(vis_list, sync=True) vis_list = arlexecute.scatter(vis_list) end = time.time() results['time corrupt'] = end - start print("Visibility corruption took %.2f seconds" % (end - start)) # Create an empty model image model_list = [ arlexecute.execute(create_image_from_visibility)( vis_list[f], npixel=npixel, cellsize=cellsize, frequency=[frequency[f]], channel_bandwidth=[channel_bandwidth[f]], polarisation_frame=PolarisationFrame("stokesI")) for f, freq in enumerate(frequency) ] model_list = arlexecute.compute(model_list, sync=True) model_list = arlexecute.scatter(model_list) psf_list = invert_list_arlexecute_workflow(vis_list, model_list, vis_slices=vis_slices, context=context, facets=facets, dopsf=True) start = time.time() print("****** Starting PSF calculation ******") psf, sumwt = arlexecute.compute(psf_list, sync=True)[centre] end = time.time() results['time psf invert'] = end - start print("PSF invert took %.2f seconds" % (end - start)) results['psf_max'] = qa_image(psf).data['max'] results['psf_min'] = qa_image(psf).data['min'] dirty_list = invert_list_arlexecute_workflow(vis_list, model_list, vis_slices=vis_slices, context=context, facets=facets) start = time.time() print("****** Starting dirty image calculation ******") dirty, sumwt = arlexecute.compute(dirty_list, sync=True)[centre] end = time.time() results['time invert'] = end - start print("Dirty image invert took %.2f seconds" % (end - start)) print("Maximum in dirty image is ", numpy.max(numpy.abs(dirty.data)), ", sumwt is ", sumwt) qa = qa_image(dirty) results['dirty_max'] = qa.data['max'] results['dirty_min'] = qa.data['min'] # Create the ICAL pipeline to run 5 major cycles, starting selfcal at cycle 1. A global solution across all # frequencies (i.e. Visibilities) is performed. start = time.time() print("****** Starting ICAL ******") controls = create_calibration_controls() controls['T']['first_selfcal'] = 1 controls['G']['first_selfcal'] = 3 controls['B']['first_selfcal'] = 4 controls['T']['timescale'] = 'auto' controls['G']['timescale'] = 'auto' controls['B']['timescale'] = 1e5 if nfreqwin > 6: nmoment = 3 algorithm = 'mmclean' elif nfreqwin > 2: nmoment = 2 algorithm = 'mmclean' else: nmoment = 1 algorithm = 'msclean' start = time.time() ical_list = ical_list_arlexecute_workflow(vis_list, model_imagelist=model_list, context='wstack', calibration_context='TG', controls=controls, scales=[0, 3, 10], algorithm=algorithm, nmoment=nmoment, niter=1000, fractional_threshold=0.1, threshold=0.1, nmajor=5, gain=0.25, vis_slices=vis_slices, timeslice='auto', global_solution=False, psf_support=64, do_selfcal=True) end = time.time() results['time ICAL graph'] = end - start print("Construction of ICAL graph took %.2f seconds" % (end - start)) # Execute the graph start = time.time() result = arlexecute.compute(ical_list, sync=True) deconvolved, residual, restored = result end = time.time() results['time ICAL'] = end - start print("ICAL graph execution took %.2f seconds" % (end - start)) qa = qa_image(deconvolved[centre]) results['deconvolved_max'] = qa.data['max'] results['deconvolved_min'] = qa.data['min'] export_image_to_fits(deconvolved[centre], "pipelines-timings-arlexecute-ical_deconvolved.fits") qa = qa_image(residual[centre][0]) results['residual_max'] = qa.data['max'] results['residual_min'] = qa.data['min'] export_image_to_fits(residual[centre][0], "pipelines-timings-arlexecute-ical_residual.fits") qa = qa_image(restored[centre]) results['restored_max'] = qa.data['max'] results['restored_min'] = qa.data['min'] export_image_to_fits(restored[centre], "pipelines-timings-arlexecute-ical_restored.fits") # arlexecute.close() end_all = time.time() results['time overall'] = end_all - start_all print("At end, results are {0!r}".format(results)) return results
def test_mpccal_MPCCAL_manysources_subimages(self): self.actualSetup() model = create_empty_image_like(self.theta_list[0].image) if arlexecute.using_dask: progress = None else: progress = self.progress future_vis = arlexecute.scatter(self.all_skymodel_noniso_vis) future_model = arlexecute.scatter(model) future_theta_list = arlexecute.scatter(self.theta_list) result = mpccal_skymodel_list_arlexecute_workflow( future_vis, future_model, future_theta_list, mpccal_progress=progress, nmajor=5, context='2d', algorithm='hogbom', scales=[0, 3, 10], fractional_threshold=0.3, threshold=0.2, gain=0.1, niter=1000, psf_support=256, deconvolve_facets=8, deconvolve_overlap=8, deconvolve_taper='tukey') (self.theta_list, residual) = arlexecute.compute(result, sync=True) combined_model = calculate_skymodel_equivalent_image(self.theta_list) psf_obs = invert_list_arlexecute_workflow( [self.all_skymodel_noniso_vis], [model], context='2d', dopsf=True) result = restore_list_arlexecute_workflow([combined_model], psf_obs, [(residual, 0.0)]) result = arlexecute.compute(result, sync=True) export_image_to_fits( residual, arl_path('test_results/test_mpccal_no_edge_residual.fits')) export_image_to_fits( result[0], arl_path('test_results/test_mpccal_no_edge_restored.fits')) export_image_to_fits( combined_model, arl_path('test_results/test_mpccal_no_edge_deconvolved.fits')) recovered_mpccal_components = find_skycomponents(result[0], fwhm=2, threshold=0.32, npixels=12) def max_flux(elem): return numpy.max(elem.flux) recovered_mpccal_components = sorted(recovered_mpccal_components, key=max_flux, reverse=True) assert recovered_mpccal_components[ 0].name == 'Segment 2', recovered_mpccal_components[0].name assert numpy.abs(recovered_mpccal_components[0].flux[0, 0] - 5.069969928419157) < 1e-7, \ recovered_mpccal_components[0].flux[0, 0] newscreen = create_empty_image_like(self.screen) gaintables = [th.gaintable for th in self.theta_list] newscreen, weights = grid_gaintable_to_screen( self.all_skymodel_noniso_blockvis, gaintables, newscreen) export_image_to_fits( newscreen, arl_path('test_results/test_mpccal_no_edge_screen.fits')) export_image_to_fits( weights, arl_path('test_results/test_mpccal_no_edge_screenweights.fits')) arlexecute.close()
def trial_case(results, seed=180555, context='wstack', nworkers=8, threads_per_worker=1, memory=8, processes=True, order='frequency', nfreqwin=7, ntimes=3, rmax=750.0, facets=1, wprojection_planes=1, use_dask=True, use_serial_imaging=False, flux_limit=0.3, nmajor=5, dft_threshold=1.0): """ Single trial for performance-timings Simulates visibilities from GLEAM including phase errors Makes dirty image and PSF Runs ICAL pipeline The results are in a dictionary: 'context': input - a string describing concisely the purpose of the test 'time overall', overall execution time (s) 'time create gleam', time to create GLEAM prediction graph 'time predict', time to execute GLEAM prediction graph 'time corrupt', time to corrupt data_models 'time invert', time to make dirty image 'time psf invert', time to make PSF 'time ICAL graph', time to create ICAL graph 'time ICAL', time to execute ICAL graph 'context', type of imaging e.g. 'wstack' 'nworkers', number of workers to create 'threads_per_worker', 'nnodes', Number of nodes, 'processes', 'order', Ordering of data_models 'nfreqwin', Number of frequency windows in simulation 'ntimes', Number of hour angles in simulation 'rmax', Maximum radius of stations used in simulation (m) 'facets', Number of facets in deconvolution and imaging 'wprojection_planes', Number of wprojection planes 'vis_slices', Number of visibility slices (per Visibbility) 'npixel', Number of pixels in image 'cellsize', Cellsize in radians 'seed', Random number seed 'dirty_max', Maximum in dirty image 'dirty_min', Minimum in dirty image 'psf_max', 'psf_min', 'restored_max', 'restored_min', 'deconvolved_max', 'deconvolved_min', 'residual_max', 'residual_min', 'git_info', GIT hash (not definitive since local mods are possible) :param results: Initial state :param seed: Random number seed (used in gain simulations) :param context: imaging context :param context: Type of context: '2d'|'timeslice'|'wstack' :param nworkers: Number of dask workers to use :param threads_per_worker: Number of threads per worker :param processes: Use processes instead of threads 'processes'|'threads' :param order: See simulate_list_list_arlexecute_workflow_workflowkflow :param nfreqwin: See simulate_list_list_arlexecute_workflow_workflowkflow :param ntimes: See simulate_list_list_arlexecute_workflow_workflowkflow :param rmax: See simulate_list_list_arlexecute_workflow_workflowkflow :param facets: Number of facets to use :param wprojection_planes: Number of wprojection planes to use :param use_dask: Use dask or immediate evaluation :return: results dictionary """ if use_dask: client = get_dask_Client(threads_per_worker=threads_per_worker, processes = threads_per_worker == 1, memory_limit=memory * 1024 * 1024 * 1024, n_workers=nworkers) arlexecute.set_client(client) nodes = findNodes(arlexecute.client) print("Defined %d workers on %d nodes" % (nworkers, len(nodes))) print("Workers are: %s" % str(nodes)) else: arlexecute.set_client(use_dask=use_dask) results['nnodes'] = 1 def init_logging(): logging.basicConfig(filename='pipelines-arlexecute-timings.log', filemode='a', format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s', datefmt='%H:%M:%S', level=logging.INFO) init_logging() log = logging.getLogger() # Initialise logging on the workers. This appears to only work using the process scheduler. arlexecute.run(init_logging) def lprint(s): log.info(s) print(s) lprint("Starting pipelines-arlexecute-timings") numpy.random.seed(seed) results['seed'] = seed start_all = time.time() results['context'] = context results['hostname'] = socket.gethostname() results['git_hash'] = git_hash() results['epoch'] = time.strftime("%Y-%m-%d %H:%M:%S") lprint("Context is %s" % context) results['nworkers'] = nworkers results['threads_per_worker'] = threads_per_worker results['processes'] = processes results['memory'] = memory results['order'] = order results['nfreqwin'] = nfreqwin results['ntimes'] = ntimes results['rmax'] = rmax results['facets'] = facets results['wprojection_planes'] = wprojection_planes results['dft threshold'] = dft_threshold results['use_dask'] = use_dask lprint("At start, configuration is:") lprint(results) # Parameters determining scale frequency = numpy.linspace(1.0e8, 1.2e8, nfreqwin) centre = nfreqwin // 2 if nfreqwin > 1: channel_bandwidth = numpy.array(nfreqwin * [frequency[1] - frequency[0]]) else: channel_bandwidth = numpy.array([1e6]) times = numpy.linspace(-numpy.pi / 4.0, numpy.pi / 4.0, ntimes) phasecentre = SkyCoord(ra=+30.0 * u.deg, dec=-60.0 * u.deg, frame='icrs', equinox='J2000') bvis_list = simulate_list_arlexecute_workflow('LOWBD2', frequency=frequency, channel_bandwidth=channel_bandwidth, times=times, phasecentre=phasecentre, order=order, format='blockvis', rmax=rmax) lprint("****** Visibility creation ******") bvis_list = arlexecute.compute(bvis_list, sync=True) vis_list = [arlexecute.execute(convert_blockvisibility_to_visibility(bv)) for bv in bvis_list] vis_list = arlexecute.compute(vis_list, sync=True) # Find the best imaging parameters but don't bring the vis_list back here def get_wf(v): return advise_wide_field(v, guard_band_image=6.0, delA=0.1, facets=facets, wprojection_planes=wprojection_planes, oversampling_synthesised_beam=4.0) advice = arlexecute.compute(arlexecute.execute(get_wf)(vis_list[-1]), sync=True) # Deconvolution via sub-images requires 2^n npixel = advice['npixels2'] results['npixel'] = npixel cellsize = advice['cellsize'] results['cellsize'] = cellsize lprint("Image will have %d by %d pixels, cellsize = %.6f rad" % (npixel, npixel, cellsize)) # Create an empty model image model_list = [arlexecute.execute(create_image_from_visibility) (vis_list[f], npixel=npixel, cellsize=cellsize, frequency=[frequency[f]], channel_bandwidth=[channel_bandwidth[f]], polarisation_frame=PolarisationFrame("stokesI")) for f, freq in enumerate(frequency)] model_list = arlexecute.compute(model_list, sync=True) model_list = arlexecute.scatter(model_list) start = time.time() vis_list = weight_list_arlexecute_workflow(vis_list, model_list) vis_list = taper_list_arlexecute_workflow(vis_list, 0.003 * 750.0 / rmax) print("****** Starting weighting and tapering ******") vis_list = arlexecute.compute(vis_list, sync=True) end = time.time() results['time weight'] = end - start print("Weighting took %.3f seconds" % (end - start)) vis_list = arlexecute.scatter(vis_list) # Now set up the imaging parameters gcfcf_list = [None for i in range(nfreqwin)] if context == 'timeslice': vis_slices = ntimes lprint("Using timeslice with %d slices" % vis_slices) elif context == '2d': vis_slices = 1 elif context == "wprojection": wstep = advice['wstep'] nw = advice['wprojection_planes'] vis_slices = 1 support = advice['nwpixels'] results['wprojection_planes'] = nw lprint("Using wprojection with %d planes with wstep %.1f wavelengths" % (nw, wstep)) start = time.time() lprint("****** Starting W projection kernel creation ******") gcfcf_list = [arlexecute.execute(create_awterm_convolutionfunction, nout=1) (m, nw=nw, wstep=wstep, oversampling=8, support=support, use_aaf=True) for m in model_list] gcfcf_list = arlexecute.compute(gcfcf_list, sync=True) end = time.time() results['time create wprojection'] = end - start lprint("Creating W projection kernel took %.3f seconds" % (end - start)) cf_image = convert_convolutionfunction_to_image(gcfcf_list[centre][1]) cf_image.data = numpy.real(cf_image.data) export_image_to_fits(cf_image, "pipelines-arlexecute-timings-wterm-cf.fits") gcfcf_list = arlexecute.scatter(gcfcf_list) else: context = 'wstack' vis_slices = advice['vis_slices'] lprint("Using wstack with %d slices" % vis_slices) results['vis_slices'] = vis_slices # Make a skymodel from gleam, with bright sources as components and weak sources in an image lprint("****** Starting GLEAM skymodel creation ******") start = time.time() skymodel_list = [arlexecute.execute(create_low_test_skymodel_from_gleam) (npixel=npixel, cellsize=cellsize, frequency=[frequency[f]], phasecentre=phasecentre, polarisation_frame=PolarisationFrame("stokesI"), flux_limit=flux_limit, flux_threshold=dft_threshold, flux_max=5.0) for f, freq in enumerate(frequency)] skymodel_list = arlexecute.compute(skymodel_list, sync=True) end = time.time() lprint("GLEAM skymodel creation took %.3f seconds" % (end - start)) results['time create gleam'] = end - start lprint("****** Starting GLEAM skymodel prediction ******") start = time.time() predicted_vis_list = [predict_skymodel_list_arlexecute_workflow(vis_list[f], [skymodel_list[f]], context=context, vis_slices=vis_slices, facets=facets, gcfcf=[gcfcf_list[f]])[0] for f, freq in enumerate(frequency)] predicted_vis_list = arlexecute.compute(predicted_vis_list, sync=True) end = time.time() lprint("GLEAM skymodel prediction took %.3f seconds" % (end - start)) results['time predict gleam'] = end - start lprint("****** Starting psf image calculation ******") start = time.time() predicted_vis_list = arlexecute.scatter(predicted_vis_list) psf_list = invert_list_arlexecute_workflow(predicted_vis_list, model_list, vis_slices=vis_slices, dopsf=True, context=context, facets=facets, use_serial_invert=use_serial_imaging, gcfcf=gcfcf_list) psf, sumwt = arlexecute.compute(psf_list, sync=True)[centre] end = time.time() results['time psf invert'] = end - start lprint("PSF invert took %.3f seconds" % (end - start)) lprint("Maximum in psf image is %f, sumwt is %s" % (numpy.max(numpy.abs(psf.data)), str(sumwt))) qa = qa_image(psf) results['psf_max'] = qa.data['max'] results['psf_min'] = qa.data['min'] export_image_to_fits(psf, "pipelines-arlexecute-timings-%s-psf.fits" % context) # Make a smoothed model image for comparison # smoothed_model_list = restore_list_arlexecute_workflow(gleam_model_list, psf_list) # smoothed_model_list = arlexecute.compute(smoothed_model_list, sync=True) # smoothed_cube = image_gather_channels(smoothed_model_list) # export_image_to_fits(smoothed_cube, "pipelines-arlexecute-timings-cmodel.fits") # Create an empty model image model_list = [arlexecute.execute(create_image_from_visibility) (predicted_vis_list[f], npixel=npixel, cellsize=cellsize, frequency=[frequency[f]], channel_bandwidth=[channel_bandwidth[f]], polarisation_frame=PolarisationFrame("stokesI")) for f, freq in enumerate(frequency)] model_list = arlexecute.compute(model_list, sync=True) model_list = arlexecute.scatter(model_list) lprint("****** Starting dirty image calculation ******") start = time.time() dirty_list = invert_list_arlexecute_workflow(predicted_vis_list, model_list, vis_slices=vis_slices, context=context, facets=facets, use_serial_invert=use_serial_imaging, gcfcf=gcfcf_list) dirty, sumwt = arlexecute.compute(dirty_list, sync=True)[centre] end = time.time() results['time invert'] = end - start lprint("Dirty image invert took %.3f seconds" % (end - start)) lprint("Maximum in dirty image is %f, sumwt is %s" % (numpy.max(numpy.abs(dirty.data)), str(sumwt))) qa = qa_image(dirty) results['dirty_max'] = qa.data['max'] results['dirty_min'] = qa.data['min'] export_image_to_fits(dirty, "pipelines-arlexecute-timings-%s-dirty.fits" % context) # Corrupt the visibility for the GLEAM model lprint("****** Visibility corruption ******") start = time.time() corrupted_vis_list = corrupt_list_arlexecute_workflow(predicted_vis_list, phase_error=1.0, seed=seed) corrupted_vis_list = arlexecute.compute(corrupted_vis_list, sync=True) end = time.time() results['time corrupt'] = end - start lprint("Visibility corruption took %.3f seconds" % (end - start)) # Create the ICAL pipeline to run major cycles, starting selfcal at cycle 1. A global solution across all # frequencies (i.e. Visibilities) is performed. lprint("****** Starting ICAL ******") controls = create_calibration_controls() controls['T']['first_selfcal'] = 1 controls['T']['timescale'] = 'auto' start = time.time() ical_list = ical_list_arlexecute_workflow(corrupted_vis_list, model_imagelist=model_list, context=context, vis_slices=vis_slices, scales=[0, 3, 10], algorithm='mmclean', nmoment=3, niter=1000, fractional_threshold=0.1, threshold=0.01, nmajor=nmajor, gain=0.25, psf_support=64, deconvolve_facets=8, deconvolve_overlap=32, deconvolve_taper='tukey', timeslice='auto', global_solution=True, do_selfcal=True, calibration_context='T', controls=controls, use_serial_predict=use_serial_imaging, use_serial_invert=use_serial_imaging, gcfcf=gcfcf_list) end = time.time() results['time ICAL graph'] = end - start lprint("Construction of ICAL graph took %.3f seconds" % (end - start)) # Execute the graph start = time.time() result = arlexecute.compute(ical_list, sync=True) deconvolved, residual, restored, gaintables = result end = time.time() results['time ICAL'] = end - start lprint("ICAL graph execution took %.3f seconds" % (end - start)) qa = qa_image(deconvolved[centre]) results['deconvolved_max'] = qa.data['max'] results['deconvolved_min'] = qa.data['min'] deconvolved_cube = image_gather_channels(deconvolved) export_image_to_fits(deconvolved_cube, "pipelines-arlexecute-timings-%s-ical_deconvolved.fits" % context) qa = qa_image(residual[centre][0]) results['residual_max'] = qa.data['max'] results['residual_min'] = qa.data['min'] residual_cube = remove_sumwt(residual) residual_cube = image_gather_channels(residual_cube) export_image_to_fits(residual_cube, "pipelines-arlexecute-timings-%s-ical_residual.fits" % context) qa = qa_image(restored[centre]) results['restored_max'] = qa.data['max'] results['restored_min'] = qa.data['min'] restored_cube = image_gather_channels(restored) export_image_to_fits(restored_cube, "pipelines-arlexecute-timings-%s-ical_restored.fits" % context) # arlexecute.close() end_all = time.time() results['time overall'] = end_all - start_all lprint("At end, results are:") lprint(results) return results