def state_validates(config=None, inmeta=None, sdmfile=None, sdmscan=None, bdfdir=None, preffile=None, prefsname=None, inprefs={}): """ Try to compile state """ from rfpipe import state try: st = state.State(inmeta=inmeta, config=config, preffile=preffile, inprefs=inprefs, name=prefsname, sdmfile=sdmfile, sdmscan=sdmscan, bdfdir=bdfdir, showsummary=False, validate=True) return True except: import traceback traceback.print_tb(sys.exc_info()[2]) logger.warn("State did not validate") return False
def pipeline_sdm(sdm, inprefs=None, intent='TARGET', preffile=None): """ Get scans from SDM and run search. intent can be partial match to any of scan intents. """ from rfpipe import state, metadata scans = list(metadata.getsdm(sdm).scans()) intents = [scan.intents for scan in scans] logger.info("Found {0} scans of intents {1} in {2}".format( len(scans), intents, sdm)) scannums = [ int(scan.idx) for scan in scans if scan.bdf.exists and any([intent in scint for scint in scan.intents]) ] logger.info("Searching {0} of {1} scans".format(len(scannums), len(scans))) ccs = [] for scannum in scannums: st = state.State(sdmfile=sdm, sdmscan=scannum, inprefs=inprefs, preffile=preffile) ccs.append(pipeline_scan(st))
def set_state(self, scanId, config=None, inmeta=None, sdmfile=None, sdmscan=None, bdfdir=None, prefsname=None): """ Given metadata source, define state for a scanId. """ # TODO: define prefsname according to config and/or heuristics prefs = preferences.Preferences(**preferences.parsepreffile( self.preffile, name=prefsname, inprefs=self.inprefs)) st = state.State(inmeta=inmeta, config=config, inprefs=prefs, lock=self.lock, sdmfile=sdmfile, sdmscan=sdmscan, bdfdir=bdfdir) logger.info('State set for scanId {0}. Requires {1:.1f} GB read and' ' {2:.1f} GPU-sec to search.'.format( st.metadata.scanId, heuristics.total_memory_read(st), heuristics.total_compute_time(st))) self.states[scanId] = st
def oldcands_readone(candsfile, scan): """ For old-style merged candidate file, create new state and candidate dataframe for a given scan. Requires sdm locally with bdf for given scan. """ with open(candsfile, 'rb') as pkl: d = pickle.load(pkl) loc, prop = pickle.load(pkl) inprefs = preferences.oldstate_preferences(d, scan=scan) inprefs.pop('gainfile') sdmfile = os.path.basename(d['filename']) if os.path.exists(sdmfile): logger.info('Parsing metadata from sdmfile {0}'.format(sdmfile)) st = state.State(sdmfile=sdmfile, sdmscan=scan, inprefs=inprefs) else: logger.info('Parsing metadata from cands file') meta = metadata.oldstate_metadata(d, scan=scan) st = state.State(inmeta=meta, inprefs=inprefs, showsummary=False) st.rtpipe_version = float(d['rtpipe_version']) if st.rtpipe_version <= 1.54: logger.info( 'Candidates detected with rtpipe version {0}. All ' 'versions <=1.54 used an incorrect DM scaling prefactor.'.format( st.rtpipe_version)) colnames = d['featureind'] logger.info('Calculating candidate properties for scan {0}'.format(scan)) df = pd.DataFrame(OrderedDict(zip(colnames, loc.transpose()))) df2 = pd.DataFrame(OrderedDict(zip(st.features, prop.transpose()))) df3 = pd.concat([df, df2], axis=1)[df.scan == scan] df3.metadata = st.metadata df3.prefs = st.prefs return st, df3
def prepare_data(sdmfile, gainfile, delta_l, delta_m, segment=0, dm=0, dt=1, spws=None): """ Applies Calibration, flagging, dedispersion and other data preparation steps from rfpipe. Then phaseshifts the data to the location of the candidate. """ st = state.State(sdmfile=sdmfile, sdmscan=1, inprefs={ 'gainfile': gainfile, 'workdir': '.', 'maxdm': 0, 'flaglist': [] }, showsummary=False) if spws: st.prefs.spw = spws data = source.read_segment(st, segment) takepol = [st.metadata.pols_orig.index(pol) for pol in st.pols] takebls = [ st.metadata.blarr_orig.tolist().index(list(bl)) for bl in st.blarr ] datap = np.require(data, requirements='W').take(takepol, axis=3).take( st.chans, axis=2).take(takebls, axis=1) datap = source.prep_standard(st, segment, datap) datap = calibration.apply_telcal(st, datap) datap = flagging.flag_data(st, datap) delay = calc_delay(st.freq, st.freq.max(), dm, st.inttime) data_dmdt = dedisperseresample(datap, delay, dt) print(f'shape of data_dmdt is {data_dmdt.shape}') uvw = get_uvw_segment(st, segment) phase_shift(data_dmdt, uvw=uvw, dl=delta_l, dm=delta_m) dataret = data_dmdt return dataret, st
def set_state(self, scanId, config=None, inmeta=None, sdmfile=None, sdmscan=None, bdfdir=None, validate=True, showsummary=True): """ Given metadata source, define state for a scanId. Uses metadata to set preferences used in preffile (prefsname). Preferences are then overloaded with self.inprefs. Will inject mock transient based on mockprob and other parameters. """ from rfpipe import preferences, state prefsname = get_prefsname(inmeta=inmeta, config=config, sdmfile=sdmfile, sdmscan=sdmscan, bdfdir=bdfdir) inprefs = preferences.parsepreffile(self.preffile, name=prefsname, inprefs=self.inprefs) # alternatively, overload prefs with compiled rules (req Python>= 3.5) # inprefs = {**inprefs, **heuristics.band_prefs(inmeta)} st = state.State(inmeta=inmeta, config=config, inprefs=inprefs, lock=self.lock, sdmfile=sdmfile, sdmscan=sdmscan, bdfdir=bdfdir, validate=validate, showsummary=showsummary) logger.info('State set for scanId {0}. Requires {1:.1f} GB read and' ' {2:.1f} GPU-sec to search.'.format( st.metadata.scanId, heuristics.total_memory_read(st), heuristics.total_compute_time(st))) self.states[scanId] = st
def state_validates(config=None, inmeta=None, sdmfile=None, sdmscan=None, bdfdir=None, preffile=None, inprefs={}): """ Try to compile state """ try: st = state.State(inmeta=inmeta, config=config, preffile=preffile, inprefs=inprefs, sdmfile=sdmfile, sdmscan=sdmscan, bdfdir=bdfdir, showsummary=False, validate=True) return True except AssertionError: return False
def refine(miniSDMlist, workdir, preffileloc, gainpath='/home/mchammer/evladata/telcal/', search_sigma=7, refine=True, classify=True, dm=350, dm_frac=0.2, dm_steps=100, devicenum='0', band='l'): os.environ['CUDA_VISIBLE_DEVICES'] = devicenum # Searching for gainfile gainfile = [] for sdm in miniSDMlist: sdmname = sdm.split('/')[-1] datasetId = '{0}'.format('_'.join(sdmname.split('_')[1:-1])) # # set the paths to the gainfile gainname = datasetId + '.GN' logger.info('Searching for the gainfile {0} in {1}'.format( gainname, gainpath)) for path, dirs, files in os.walk(gainpath): for f in filter(lambda x: gainname in x, files): gainfile.append(os.path.join(path, gainname)) break # Searching all miniSDMs for index, sdm in enumerate(miniSDMlist): prefs = { 'saveplots': True, 'savenoise': False, 'savesols': False, 'savecandcollection': True, 'savecanddata': True, 'workdir': workdir, 'gainfile': gainfile[index], 'sigma_image1': search_sigma, 'dmarr': list(np.linspace(dm - dm_frac * dm, dm + dm_frac * dm, dm_steps)) } if band == 'l': name = 'NRAOdefaultL' elif band == 's': name = 'NRAOdefaultS' else: logger.exception("Only L and S band supported") return try: st = state.State(sdmfile=sdm, sdmscan=1, inprefs=prefs, preffile=preffileloc, name=name) cc = pipeline.pipeline_scan(st) except AssertionError as e: logger.exception( 'BDF not found for {0}, moving on to next sdm'.format(sdm)) # # Classify and generate refinement plots # Classify the generated pickles using FETCH and generate refinement plots for miniSDM in miniSDMlist: sdmname = miniSDM.split('/')[-1] for pkl in glob.glob(st.prefs.workdir + '/' + 'cands_*' + sdmname.split('/')[0] + '*.pkl'): if classify or refine: logger.info('Refining and classifying pkl: {0}'.format(pkl)) ccs = list(candidates.iter_cands(pkl, select='candcollection')) for cc in ccs: cds = cc.canddata if cds: for cd in cds: if classify: payload = candidates.cd_to_fetch( cd, classify=True, save_png=True, show=True, mode='GPU', outdir=workdir, devicenum='0') logger.info( 'FETCH FRB Probability of the candidate {0} is {1}' .format(cd.candid, payload)) if refine: logger.info('Generating Refinement plots') cd_refine(cd, save=True, outdir=workdir) else: logger.info( 'No candidate was found in cc: {0}'.format(cc))
def oldcands_readone(candsfile, scan=None): """ Reads old-style candidate files to create new state and candidate collection for a given scan. Parsing merged cands file requires sdm locally with bdf for given scan. If no scan provided, assumes candsfile is from single scan not merged. """ from rfpipe import preferences, metadata, state, candidates with open(candsfile, 'rb') as pkl: try: d = pickle.load(pkl) ret = pickle.load(pkl) except UnicodeDecodeError: d = pickle.load(pkl, encoding='latin-1') ret = pickle.load(pkl, encoding='latin-1') if isinstance(ret, tuple): loc, prop = ret elif isinstance(ret, dict): loc = np.array(list(ret.keys())) prop = np.array(list(ret.values())) else: logger.warning( "Not sure what we've got in this here cands pkl file...") # detect merged vs nonmerged if 'scan' in d['featureind']: locind0 = 1 else: locind0 = 0 # merged candsfiles must be called with scan arg if scan is None: assert locind0 == 0, "Set scan if candsfile has multiple scans." inprefs = preferences.oldstate_preferences(d, scan=scan) inprefs.pop('gainfile') inprefs.pop('workdir') inprefs.pop('fileroot') inprefs['segmenttimes'] = inprefs['segmenttimes'] sdmfile = os.path.basename(d['filename']) try: assert scan is not None st = state.State(sdmfile=sdmfile, sdmscan=scan, inprefs=inprefs) except: meta = metadata.oldstate_metadata(d, scan=scan) st = state.State(inmeta=meta, inprefs=inprefs, showsummary=False) if 'rtpipe_version' in d: st.rtpipe_version = float(d['rtpipe_version']) # TODO test this if st.rtpipe_version <= 1.54: logger.info('Candidates detected with rtpipe version {0}. All ' 'versions <=1.54 used incorrect DM scaling.'.format( st.rtpipe_version)) if scan is None: assert locind0 == 0, "Set scan if candsfile has multiple scans." scan = d['scan'] logger.info('Calculating candidate properties for scan {0}'.format(scan)) if locind0 == 1: loc = loc[np.where(loc[:, 0] == scan)][:, locind0:] print(st.features, st.prefs.searchtype) fields = [str(ff) for ff in st.search_dimensions + st.features] types = [ str(tt) for tt in len(st.search_dimensions) * ['<i4'] + len(st.features) * ['<f4'] ] dtype = np.dtype({'names': fields, 'formats': types}) features = np.zeros(len(loc), dtype=dtype) for i in range(len(loc)): features[i] = tuple(list(loc[i]) + list(prop[i])) cc = candidates.CandCollection(features, st.prefs, st.metadata) return st, cc
def refine_sdm(sdmname, dm, preffile='realfast.yml', gainpath='/home/mchammer/evladata/telcal/', npix_max=None, npix_max_orig=None, search_sigma=7, ddm=100, refine=True, classify=True, devicenum=None, workdir=None, inprefs=None): """ Given candId, look for SDM in portal, then run refinement. Assumes this is running on rfnode with CBE lustre. npix_max_orig sets the npix_max or the original detection. ddm sets +- of dm grid to search """ from rfpipe import metadata, state, pipeline, candidates, util if devicenum is None: try: from distributed import get_worker name = get_worker().name devicenum = int(name.split('g')[1]) except ValueError: devicenum = 0 # Searching for gainfile datasetId = '{0}'.format('_'.join( os.path.basename(sdmname).split('_')[1:-1])) # set the paths to the gainfile gainname = datasetId + '.GN' logging.info('Searching for the gainfile {0} in {1}'.format( gainname, gainpath)) for path, dirs, files in os.walk(gainpath): for f in filter(lambda x: gainname in x, files): gainfile = os.path.join(path, gainname) break # Searching all miniSDMs if inprefs: prefs = inprefs else: prefs = { 'saveplots': False, 'savenoise': False, 'savesols': False, 'savecandcollection': False, 'savecanddata': True, 'dm_maxloss': 0.01, 'npix_max': npix_max } prefs['gainfile'] = gainfile prefs['workdir'] = workdir prefs['sigma_image1'] = search_sigma prefs['maxdm'] = dm + ddm bdfdir = metadata.get_bdfdir(sdmfile=sdmname, sdmscan=1) band = metadata.sdmband(sdmfile=sdmname, sdmscan=1, bdfdir=bdfdir) cc = None try: st = state.State(sdmfile=sdmname, sdmscan=1, inprefs=prefs, preffile=preffile, name='NRAOdefault' + band, showsummary=False, bdfdir=bdfdir) except AssertionError: try: logger.warning( "Could not generate state with full image. Trying with npix_max at 2x original image size..." ) prefs['npix_max'] = min(npix_max, 2 * npix_max_orig) st = state.State(sdmfile=sdmname, sdmscan=1, inprefs=prefs, preffile=preffile, name='NRAOdefault' + band, bdfdir=bdfdir, showsummary=False) except AssertionError: # could be state can't be defined logger.warning( "Could not generate state with 2x images. Trying with original image size..." ) prefs['npix_max'] = min(npix_max, npix_max_orig) st = state.State(sdmfile=sdmname, sdmscan=1, inprefs=prefs, preffile=preffile, name='NRAOdefault' + band, bdfdir=bdfdir, showsummary=False) except FileNotFoundError as e: logger.warning("{0}".format(e)) return cc st.prefs.dmarr = sorted( [dm] + [dm0 for dm0 in st.dmarr if (dm0 == 0 or dm0 > dm - ddm) ]) # remove superfluous dms, enforce orig dm st.clearcache() st.summarize() ccs = pipeline.pipeline_scan(st, devicenum=devicenum) cc = sum(ccs) if len(ccs) else ccs # Classify the generated pickles using FETCH and generate refinement plots if len(cc): maxind = np.where(cc.snrtot == cc.snrtot.max())[0] assert len(maxind) == 1 cd = cc[maxind[0]].canddata[0] assert isinstance(cd, candidates.CandData) if classify: frbprob = candidates.cd_to_fetch(cd, classify=True, devicenum=devicenum, mode='CPU') logging.info( 'FETCH FRB Probability of the candidate {0} is {1}'.format( cd.candid, frbprob)) else: frbprob = None if refine: logging.info('Generating Refinement plots') cd_refined_plot(cd, devicenum, frbprob=frbprob) else: if prefs['npix_max'] != npix_max_orig: logging.info( 'No candidate was found in first search. Trying again with original image size.' .format(cc)) prefs['npix_max'] = npix_max_orig st = state.State(sdmfile=sdmname, sdmscan=1, inprefs=prefs, preffile=preffile, name='NRAOdefault' + band, bdfdir=bdfdir, showsummary=False) st.prefs.dmarr = sorted( [dm] + [dm0 for dm0 in st.dmarr if (dm0 == 0 or dm0 > dm - ddm) ]) # remove superfluous dms, enforce orig dm st.clearcache() st.summarize() ccs = pipeline.pipeline_scan(st, devicenum=devicenum) cc = sum(ccs) if len(ccs) else ccs if len(cc): maxind = np.where(cc.snrtot == cc.snrtot.max())[0] assert len(maxind) == 1 cd = cc[maxind[0]].canddata[0] assert isinstance(cd, candidates.CandData) if classify: frbprob = candidates.cd_to_fetch(cd, classify=True, mode='CPU') logging.info( 'FETCH FRB Probability of the candidate {0} is {1}'. format(cd.candid, frbprob)) else: frbprob = None if refine: logging.info('Generating Refinement plots') cd_refined_plot(cd, devicenum, frbprob=frbprob) else: logging.info( 'No candidate was found in search at original image size. Giving up.' ) return cc
len(scans), intents, sdmname)) intent = 'TARGET' scannums = [ int(scan.idx) for scan in scans if scan.bdf.exists and any([intent in scint for scint in scan.intents]) ] logger.info("Found {0} scans with intent {1} of total {2} scans".format( len(scannums), intent, len(scans))) scannum = scannums[random.randint(0, len(scannums) - 1)] band = metadata.sdmband(sdmfile=sdmname, sdmscan=scannum) try: st = state.State(sdmfile=sdmname, sdmscan=scannum, preffile=preffile, name='NRAOdefault' + band, showsummary=False) except ValueError: prefs = {'spw': None} st = state.State(sdmfile=sdmname, sdmscan=scannum, preffile=preffile, name='NRAOdefault' + band, showsummary=False, inprefs=prefs) st.prefs.gainfile = gainfile st.prefs.workdir = '/hyrule/data/users/kshitij/fetchrf/sim_frbs/' #fetch_data_dir+datasetId logging.info('Working directory set to {0}'.format(st.prefs.workdir))
def inject_one(preffile, devicenum, outdir): """ Script to inject one simulated FRB on simulated data and save unclustered candidates. :param preffile: Preference file with search preferences :param devicenum: GPU devicenumber :param outdir: Output directory :return: """ configs = ["A", "B", "C", "D"] bands = ["L", "S", "X", "C"] config = configs[np.random.randint(len(configs))] band = bands[np.random.randint(len(bands))] t0 = time.Time.now().mjd meta = metadata.mock_metadata( t0, t0 + 10 / (24 * 3600), 20, 11, 32 * 4 * 2, 2, 5e3, scan=1, datasource="sim", antconfig=config, band=band, ) dataset = meta["datasetId"] + "_config_" + config + "_band_" + band workdir = outdir + "/" + dataset try: os.mkdir(workdir) except FileExistsError: logging.info("Directory {0} exists, using it.".format(workdir)) except OSError: logging.info("Can't create directory {0}".format(workdir)) else: logging.info("Created directory {0}".format(workdir)) prefs = {} prefs["workdir"] = workdir prefs["savenoise"] = False prefs["fftmode"] = "fftw" prefs["nthread"] = 10 prefs["flaglist"] = [ ("badchtslide", 4.0, 20), ("badchtslide", 4, 20), ("badspw", 4.0), ("blstd", 3.5, 0.008), ] st = state.State( inmeta=meta, showsummary=False, preffile=preffile, name="NRAOdefault" + band, inprefs=prefs, ) segment = 0 data = source.read_segment(st, segment) dmind = None dtind = None snr = np.random.uniform(low=10, high=40) mock = util.make_transient_params(st, snr=snr, segment=segment, data=data, ntr=1, lm=-1, dmind=dmind, dtind=dtind) st.clearcache() st.prefs.simulated_transient = mock cc = pipeline.pipeline_seg(st=st, segment=segment, devicenum=devicenum) if not len(cc): logging.info( "No candidate found. Deleting the empty pickle, and trying with a higher SNR now." ) pkl = glob.glob(cc.state.prefs.workdir + "/*pkl")[0] try: os.remove(pkl) except OSError as e: pass snr = snr + 5 mock = util.make_transient_params( st, snr=snr, segment=segment, data=data, ntr=1, lm=-1, dmind=dmind, dtind=dtind, ) st.clearcache() st.prefs.simulated_transient = mock cc = pipeline.pipeline_seg(st=st, segment=segment, devicenum=devicenum)