forked from transientskp/imaging
/
imaging-multibeam.py
268 lines (236 loc) · 11.2 KB
/
imaging-multibeam.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
#!/usr/bin/env python
import os
import sys
import numpy
import math
import glob
import shutil
import lofar.parameterset
from multiprocessing import cpu_count
from multiprocessing.pool import ThreadPool
from tempfile import mkdtemp
from pyrap.tables import table
from utility import run_process
from utility import time_code
from utility import get_parset_subset
from utility import make_directory
from utility import copy_to_work_area
from utility import run_awimager
from utility import run_ndppp
from utility import run_calibrate_standalone
from utility import clear_calibrate_stand_alone_logs
from utility import find_bad_stations
from utility import strip_stations
from utility import limit_baselines
from utility import estimate_noise
from utility import make_mask
from utility import read_ms_list
# All temporary writes go to scratch space on the node.
scratch = os.getenv("TMPDIR")
if __name__ == "__main__":
# Our single command line argument is a parset containing all
# configuration information we'll need.
input_parset = lofar.parameterset.parameterset(sys.argv[1])
# We require `sbs_per_beam` input MeasurementSets for each beam, including
# the calibrator.
sbs_per_beam = sum(input_parset.getIntVector("band_size"))
print "Locating calibrator data and checking paths"
ms_cal = {}
ms_cal["datafiles"] = read_ms_list(input_parset.getString("cal_ms_list"))
assert(len(ms_cal["datafiles"]) == sbs_per_beam)
ms_cal["output_dir"] = os.path.join(
input_parset.getString("output_dir"),
"calibrator",
input_parset.getString("cal_obsid")
)
make_directory(ms_cal["output_dir"])
print "Copying calibrator subbands to output"
ms_cal["datafiles"] = copy_to_work_area(ms_cal["datafiles"], ms_cal["output_dir"])
print "Locating target data and checking paths"
# ms_target will be a dict that provides all the information we need to
# process each independent element of the observation, where an "element"
# is a combination of a beam (SAP) and a band (number of subbands)
ms_target = {}
target_mss = read_ms_list(input_parset.getString("target_ms_list"))
assert(len(target_mss) == input_parset.getInt("n_beams") * sbs_per_beam)
for beam, data in enumerate(zip(*[iter(target_mss)]*sbs_per_beam)):
start_sb = 0
for band, band_size in enumerate(input_parset.getIntVector("band_size")):
target_info = {}
target_info['datafiles'] = target_mss[start_sb:start_sb+band_size]
target_info['calfiles' ] = ms_cal["datafiles"][start_sb:start_sb+band_size]
assert(len(target_info['datafiles']) == len(target_info['calfiles']))
target_info['output_dir'] = os.path.join(
input_parset.getString("output_dir"),
"target",
input_parset.getString("target_obsid"),
"SAP00%d" % (beam,)
)
make_directory(target_info["output_dir"])
target_info["output_ms"] = os.path.join(target_info["output_dir"], "%s_SAP00%d_band%d.MS" % (input_parset.getString("target_obsid"), beam, band))
assert(not os.path.exists(target_info["output_ms"]))
target_info["output_im"] = os.path.join(target_info["output_dir"], "%s_SAP00%d_band%d.img" % (input_parset.getString("target_obsid"), beam, band))
assert(not os.path.exists(target_info["output_im"]))
pointing = map(math.degrees, table("%s::FIELD" % target_info["datafiles"][0]).getcol("REFERENCE_DIR")[0][0])
target_info["skymodel"] = os.path.join(
input_parset.getString("skymodel_dir"),
"%.2f_%.2f.skymodel" % (pointing[0], pointing[1])
)
assert(os.path.exists(target_info["skymodel"]))
ms_target["SAP00%d_band%d" % (beam, band)] = target_info
start_sb += band_size
# Copy to working directories
for name in ms_target.iterkeys():
print "Copying %s to scratch area" % (name,)
ms_target[name]["datafiles"] = copy_to_work_area(
ms_target[name]["datafiles"], scratch
)
# We'll run as many simultaneous jobs as we have CPUs
pool = ThreadPool(cpu_count())
# Calibration of each calibrator subband
os.chdir(ms_cal['output_dir']) # Logs will get dumped here
clear_calibrate_stand_alone_logs()
calcal_parset = get_parset_subset(input_parset, "calcal.parset", scratch)
def calibrate_calibrator(cal):
source = table("%s::OBSERVATION" % (cal,)).getcol("LOFAR_TARGET")['array'][0].lower().replace(' ', '')
skymodel = os.path.join(
input_parset.getString("skymodel_dir"),
"%s.skymodel" % (source,)
)
print "Calibrating %s with skymodel %s" % (cal, skymodel)
run_calibrate_standalone(calcal_parset, cal, skymodel, replace_parmdb=True, replace_sourcedb=True)
with time_code("Calibration of calibrator"):
pool.map(calibrate_calibrator, ms_cal["datafiles"])
# Clip calibrator parmdbs
def clip_parmdb(sb):
run_process(
input_parset.getString("pdbclip.executable"),
"--auto",
"--sigma=%f" % (input_parset.getFloat("pdbclip.sigma"),),
os.path.join(sb, "instrument")
)
with time_code("Clip calibrator instrument databases"):
pool.map(lambda sb: clip_parmdb(sb), ms_cal["datafiles"])
# Transfer calibration solutions to targets
transfer_parset = get_parset_subset(input_parset, "transfer.parset", scratch)
transfer_skymodel = input_parset.getString("transfer.skymodel")
clear_calibrate_stand_alone_logs()
def transfer_calibration(ms_pair):
cal, target = ms_pair
print "Transferring solution from %s to %s" % (cal, target)
parmdb_name = mkdtemp(dir=scratch)
run_process("parmexportcal", "in=%s/instrument/" % (cal,), "out=%s" % (parmdb_name,))
run_process("calibrate-stand-alone", "--parmdb", parmdb_name, target, transfer_parset, transfer_skymodel)
with time_code("Transfer of calibration solutions"):
for target in ms_target.itervalues():
pool.map(transfer_calibration, zip(target["calfiles"], target["datafiles"]))
# Combine with NDPPP
def combine_ms(target_info):
output = os.path.join(mkdtemp(dir=scratch), "combined.MS")
run_ndppp(
get_parset_subset(input_parset, "combine.parset", scratch),
{
"msin": str(target_info["datafiles"]),
"msout": output
}
)
target_info["combined_ms"] = output
with time_code("Combining target subbands"):
pool.map(combine_ms, ms_target.values())
# Phase only calibration of combined target subbands
print "Running phase only calibration"
def phaseonly(target_info):
# We chdir to the scratch directory initially, so that logs get dumped
# there, then we'll copy the logs to the output directory when we're
# done.
try:
os.chdir(os.path.dirname(target_info["combined_ms"]))
run_calibrate_standalone(
get_parset_subset(input_parset, "phaseonly.parset", scratch),
target_info["combined_ms"],
target_info["skymodel"]
)
for logfile in glob.glob(
os.path.join(
os.path.dirname(target_info["combined_ms"]),
"*log"
)
):
shutil.copy(logfile, target_info["output_dir"])
except Exception, e:
print "Error in phaseonly with %s" % (target_info["combined_ms"])
print str(e)
raise
# Most Lisa nodes have 24 GB RAM -- we don't want to run out
calpool = ThreadPool(6)
with time_code("Phase-only calibration"):
calpool.map(phaseonly, ms_target.values())
# Strip bad stations.
# Note that the combined, calibrated, stripped MS is one of our output
# data products, so we save that with the name specified in the parset.
def strip_bad_stations(target_info):
bad_stations = find_bad_stations(target_info["combined_ms"], scratch)
strip_stations(target_info["combined_ms"], target_info["output_ms"], bad_stations)
with time_code("Strip bad stations"):
pool.map(strip_bad_stations, ms_target.values())
# Limit the length of the baselines we're using.
# We'll image a reference table using only the short baselines.
maxbl = input_parset.getFloat("limit.max_baseline")
def limit_bl(target_info):
target_info["bl_limit_ms"] = mkdtemp(dir=scratch)
limit_baselines(target_info["output_ms"], target_info["bl_limit_ms"], maxbl)
with time_code("Limiting maximum baseline length"):
pool.map(limit_bl, ms_target.values())
# We source a special build for using the "new" awimager
awim_init = input_parset.getString("awimager.initscript")
# Calculate the threshold for cleaning based on the noise in a dirty map
# We don't use our threadpool here, since awimager is parallelized
noise_parset_name = get_parset_subset(input_parset, "noise.parset", scratch)
with time_code("Calculating threshold for cleaning"):
for target_info in ms_target.values():
print "Getting threshold for %s" % target_info["output_ms"]
target_info["threshold"] = input_parset.getFloat("noise.multiplier") * estimate_noise(
target_info["bl_limit_ms"],
noise_parset_name,
maxbl,
input_parset.getFloat("noise.box_size"),
scratch
)
print "Threshold for %s is %f Jy" % (target_info["output_ms"], target_info["threshold"])
# Make a mask for cleaning
aw_parset_name = get_parset_subset(input_parset, "image.parset", scratch)
with time_code("Making mask"):
for target_info in ms_target.values():
print "Making mask for %s" % target_info["output_ms"]
target_info["mask"] = make_mask(
target_info["bl_limit_ms"],
aw_parset_name,
target_info["skymodel"],
input_parset.getString("make_mask.executable"),
scratch,
awim_init=awim_init
)
with time_code("Making images"):
for target_info in ms_target.values():
print "Making image %s" % target_info["output_im"]
print run_awimager(aw_parset_name,
{
"ms": target_info["bl_limit_ms"],
"mask": target_info["mask"],
"threshold": "%fJy" % (target_info["threshold"],),
"image": target_info["output_im"],
"wmax": maxbl
},
initscript=awim_init
)
print "Updaging metadata in %s" % target_info["output_im"]
run_process(
"addImagingInfo",
"%s.restored.corr" % target_info["output_im"],
"", # No sky model specified
"0",
str(maxbl),
target_info["output_ms"]
)
print "Saving mask for %s to %s" % (target_info["output_im"], target_info["output_im"] + ".mask")
shutil.copytree(target_info["mask"], target_info["output_im"] + ".mask")