Exemple #1
0
def state_validates(config=None,
                    inmeta=None,
                    sdmfile=None,
                    sdmscan=None,
                    bdfdir=None,
                    preffile=None,
                    prefsname=None,
                    inprefs={}):
    """ Try to compile state
    """

    from rfpipe import state

    try:
        st = state.State(inmeta=inmeta,
                         config=config,
                         preffile=preffile,
                         inprefs=inprefs,
                         name=prefsname,
                         sdmfile=sdmfile,
                         sdmscan=sdmscan,
                         bdfdir=bdfdir,
                         showsummary=False,
                         validate=True)
        return True
    except:
        import traceback
        traceback.print_tb(sys.exc_info()[2])
        logger.warn("State did not validate")
        return False
Exemple #2
0
def pipeline_sdm(sdm, inprefs=None, intent='TARGET', preffile=None):
    """ Get scans from SDM and run search.
    intent can be partial match to any of scan intents.
    """

    from rfpipe import state, metadata

    scans = list(metadata.getsdm(sdm).scans())
    intents = [scan.intents for scan in scans]
    logger.info("Found {0} scans of intents {1} in {2}".format(
        len(scans), intents, sdm))

    scannums = [
        int(scan.idx) for scan in scans
        if scan.bdf.exists and any([intent in scint for scint in scan.intents])
    ]
    logger.info("Searching {0} of {1} scans".format(len(scannums), len(scans)))

    ccs = []
    for scannum in scannums:
        st = state.State(sdmfile=sdm,
                         sdmscan=scannum,
                         inprefs=inprefs,
                         preffile=preffile)
        ccs.append(pipeline_scan(st))
Exemple #3
0
    def set_state(self,
                  scanId,
                  config=None,
                  inmeta=None,
                  sdmfile=None,
                  sdmscan=None,
                  bdfdir=None,
                  prefsname=None):
        """ Given metadata source, define state for a scanId.
        """

        # TODO: define prefsname according to config and/or heuristics
        prefs = preferences.Preferences(**preferences.parsepreffile(
            self.preffile, name=prefsname, inprefs=self.inprefs))

        st = state.State(inmeta=inmeta,
                         config=config,
                         inprefs=prefs,
                         lock=self.lock,
                         sdmfile=sdmfile,
                         sdmscan=sdmscan,
                         bdfdir=bdfdir)

        logger.info('State set for scanId {0}. Requires {1:.1f} GB read and'
                    ' {2:.1f} GPU-sec to search.'.format(
                        st.metadata.scanId, heuristics.total_memory_read(st),
                        heuristics.total_compute_time(st)))

        self.states[scanId] = st
Exemple #4
0
def oldcands_readone(candsfile, scan):
    """ For old-style merged candidate file, create new state and candidate
    dataframe for a given scan.
    Requires sdm locally with bdf for given scan.
    """

    with open(candsfile, 'rb') as pkl:
        d = pickle.load(pkl)
        loc, prop = pickle.load(pkl)

    inprefs = preferences.oldstate_preferences(d, scan=scan)
    inprefs.pop('gainfile')
    sdmfile = os.path.basename(d['filename'])
    if os.path.exists(sdmfile):
        logger.info('Parsing metadata from sdmfile {0}'.format(sdmfile))
        st = state.State(sdmfile=sdmfile, sdmscan=scan, inprefs=inprefs)
    else:
        logger.info('Parsing metadata from cands file')
        meta = metadata.oldstate_metadata(d, scan=scan)
        st = state.State(inmeta=meta, inprefs=inprefs, showsummary=False)

    st.rtpipe_version = float(d['rtpipe_version'])
    if st.rtpipe_version <= 1.54:
        logger.info(
            'Candidates detected with rtpipe version {0}. All '
            'versions <=1.54 used an incorrect DM scaling prefactor.'.format(
                st.rtpipe_version))

    colnames = d['featureind']
    logger.info('Calculating candidate properties for scan {0}'.format(scan))
    df = pd.DataFrame(OrderedDict(zip(colnames, loc.transpose())))
    df2 = pd.DataFrame(OrderedDict(zip(st.features, prop.transpose())))
    df3 = pd.concat([df, df2], axis=1)[df.scan == scan]

    df3.metadata = st.metadata
    df3.prefs = st.prefs

    return st, df3
def prepare_data(sdmfile,
                 gainfile,
                 delta_l,
                 delta_m,
                 segment=0,
                 dm=0,
                 dt=1,
                 spws=None):
    """
    
    Applies Calibration, flagging, dedispersion and other data preparation steps
    from rfpipe. Then phaseshifts the data to the location of the candidate. 
    
    """
    st = state.State(sdmfile=sdmfile,
                     sdmscan=1,
                     inprefs={
                         'gainfile': gainfile,
                         'workdir': '.',
                         'maxdm': 0,
                         'flaglist': []
                     },
                     showsummary=False)
    if spws:
        st.prefs.spw = spws

    data = source.read_segment(st, segment)

    takepol = [st.metadata.pols_orig.index(pol) for pol in st.pols]
    takebls = [
        st.metadata.blarr_orig.tolist().index(list(bl)) for bl in st.blarr
    ]
    datap = np.require(data, requirements='W').take(takepol, axis=3).take(
        st.chans, axis=2).take(takebls, axis=1)
    datap = source.prep_standard(st, segment, datap)
    datap = calibration.apply_telcal(st, datap)
    datap = flagging.flag_data(st, datap)

    delay = calc_delay(st.freq, st.freq.max(), dm, st.inttime)
    data_dmdt = dedisperseresample(datap, delay, dt)

    print(f'shape of data_dmdt is {data_dmdt.shape}')

    uvw = get_uvw_segment(st, segment)
    phase_shift(data_dmdt, uvw=uvw, dl=delta_l, dm=delta_m)

    dataret = data_dmdt
    return dataret, st
Exemple #6
0
    def set_state(self,
                  scanId,
                  config=None,
                  inmeta=None,
                  sdmfile=None,
                  sdmscan=None,
                  bdfdir=None,
                  validate=True,
                  showsummary=True):
        """ Given metadata source, define state for a scanId.
        Uses metadata to set preferences used in preffile (prefsname).
        Preferences are then overloaded with self.inprefs.
        Will inject mock transient based on mockprob and other parameters.
        """

        from rfpipe import preferences, state

        prefsname = get_prefsname(inmeta=inmeta,
                                  config=config,
                                  sdmfile=sdmfile,
                                  sdmscan=sdmscan,
                                  bdfdir=bdfdir)

        inprefs = preferences.parsepreffile(self.preffile,
                                            name=prefsname,
                                            inprefs=self.inprefs)

        # alternatively, overload prefs with compiled rules (req Python>= 3.5)
        #        inprefs = {**inprefs, **heuristics.band_prefs(inmeta)}

        st = state.State(inmeta=inmeta,
                         config=config,
                         inprefs=inprefs,
                         lock=self.lock,
                         sdmfile=sdmfile,
                         sdmscan=sdmscan,
                         bdfdir=bdfdir,
                         validate=validate,
                         showsummary=showsummary)

        logger.info('State set for scanId {0}. Requires {1:.1f} GB read and'
                    ' {2:.1f} GPU-sec to search.'.format(
                        st.metadata.scanId, heuristics.total_memory_read(st),
                        heuristics.total_compute_time(st)))

        self.states[scanId] = st
Exemple #7
0
def state_validates(config=None,
                    inmeta=None,
                    sdmfile=None,
                    sdmscan=None,
                    bdfdir=None,
                    preffile=None,
                    inprefs={}):
    """ Try to compile state
    """

    try:
        st = state.State(inmeta=inmeta,
                         config=config,
                         preffile=preffile,
                         inprefs=inprefs,
                         sdmfile=sdmfile,
                         sdmscan=sdmscan,
                         bdfdir=bdfdir,
                         showsummary=False,
                         validate=True)
        return True
    except AssertionError:
        return False
def refine(miniSDMlist,
           workdir,
           preffileloc,
           gainpath='/home/mchammer/evladata/telcal/',
           search_sigma=7,
           refine=True,
           classify=True,
           dm=350,
           dm_frac=0.2,
           dm_steps=100,
           devicenum='0',
           band='l'):

    os.environ['CUDA_VISIBLE_DEVICES'] = devicenum
    # Searching for gainfile
    gainfile = []
    for sdm in miniSDMlist:
        sdmname = sdm.split('/')[-1]
        datasetId = '{0}'.format('_'.join(sdmname.split('_')[1:-1]))
        # # set the paths to the gainfile
        gainname = datasetId + '.GN'
        logger.info('Searching for the gainfile {0} in {1}'.format(
            gainname, gainpath))
        for path, dirs, files in os.walk(gainpath):
            for f in filter(lambda x: gainname in x, files):
                gainfile.append(os.path.join(path, gainname))
                break

    # Searching all miniSDMs
    for index, sdm in enumerate(miniSDMlist):
        prefs = {
            'saveplots':
            True,
            'savenoise':
            False,
            'savesols':
            False,
            'savecandcollection':
            True,
            'savecanddata':
            True,
            'workdir':
            workdir,
            'gainfile':
            gainfile[index],
            'sigma_image1':
            search_sigma,
            'dmarr':
            list(np.linspace(dm - dm_frac * dm, dm + dm_frac * dm, dm_steps))
        }
        if band == 'l':
            name = 'NRAOdefaultL'
        elif band == 's':
            name = 'NRAOdefaultS'
        else:
            logger.exception("Only L and S band supported")
            return

        try:
            st = state.State(sdmfile=sdm,
                             sdmscan=1,
                             inprefs=prefs,
                             preffile=preffileloc,
                             name=name)
            cc = pipeline.pipeline_scan(st)
        except AssertionError as e:
            logger.exception(
                'BDF not found for {0}, moving on to next sdm'.format(sdm))

    # # Classify and generate refinement plots
    # Classify the generated pickles using FETCH and generate refinement plots
    for miniSDM in miniSDMlist:
        sdmname = miniSDM.split('/')[-1]
        for pkl in glob.glob(st.prefs.workdir + '/' + 'cands_*' +
                             sdmname.split('/')[0] + '*.pkl'):
            if classify or refine:
                logger.info('Refining and classifying pkl: {0}'.format(pkl))
                ccs = list(candidates.iter_cands(pkl, select='candcollection'))
                for cc in ccs:
                    cds = cc.canddata
                    if cds:
                        for cd in cds:
                            if classify:
                                payload = candidates.cd_to_fetch(
                                    cd,
                                    classify=True,
                                    save_png=True,
                                    show=True,
                                    mode='GPU',
                                    outdir=workdir,
                                    devicenum='0')
                                logger.info(
                                    'FETCH FRB Probability of the candidate {0} is {1}'
                                    .format(cd.candid, payload))
                            if refine:
                                logger.info('Generating Refinement plots')
                                cd_refine(cd, save=True, outdir=workdir)
                    else:
                        logger.info(
                            'No candidate was found in cc: {0}'.format(cc))
Exemple #9
0
def oldcands_readone(candsfile, scan=None):
    """ Reads old-style candidate files to create new state and candidate
    collection for a given scan.
    Parsing merged cands file requires sdm locally with bdf for given scan.
    If no scan provided, assumes candsfile is from single scan not merged.
    """

    from rfpipe import preferences, metadata, state, candidates

    with open(candsfile, 'rb') as pkl:
        try:
            d = pickle.load(pkl)
            ret = pickle.load(pkl)
        except UnicodeDecodeError:
            d = pickle.load(pkl, encoding='latin-1')
            ret = pickle.load(pkl, encoding='latin-1')
        if isinstance(ret, tuple):
            loc, prop = ret
        elif isinstance(ret, dict):
            loc = np.array(list(ret.keys()))
            prop = np.array(list(ret.values()))
        else:
            logger.warning(
                "Not sure what we've got in this here cands pkl file...")

    # detect merged vs nonmerged
    if 'scan' in d['featureind']:
        locind0 = 1
    else:
        locind0 = 0

    # merged candsfiles must be called with scan arg
    if scan is None:
        assert locind0 == 0, "Set scan if candsfile has multiple scans."

    inprefs = preferences.oldstate_preferences(d, scan=scan)
    inprefs.pop('gainfile')
    inprefs.pop('workdir')
    inprefs.pop('fileroot')
    inprefs['segmenttimes'] = inprefs['segmenttimes']
    sdmfile = os.path.basename(d['filename'])

    try:
        assert scan is not None
        st = state.State(sdmfile=sdmfile, sdmscan=scan, inprefs=inprefs)
    except:
        meta = metadata.oldstate_metadata(d, scan=scan)
        st = state.State(inmeta=meta, inprefs=inprefs, showsummary=False)

    if 'rtpipe_version' in d:
        st.rtpipe_version = float(d['rtpipe_version'])  # TODO test this
        if st.rtpipe_version <= 1.54:
            logger.info('Candidates detected with rtpipe version {0}. All '
                        'versions <=1.54 used incorrect DM scaling.'.format(
                            st.rtpipe_version))

    if scan is None:
        assert locind0 == 0, "Set scan if candsfile has multiple scans."
        scan = d['scan']

    logger.info('Calculating candidate properties for scan {0}'.format(scan))

    if locind0 == 1:
        loc = loc[np.where(loc[:, 0] == scan)][:, locind0:]

    print(st.features, st.prefs.searchtype)
    fields = [str(ff) for ff in st.search_dimensions + st.features]
    types = [
        str(tt) for tt in len(st.search_dimensions) * ['<i4'] +
        len(st.features) * ['<f4']
    ]
    dtype = np.dtype({'names': fields, 'formats': types})
    features = np.zeros(len(loc), dtype=dtype)
    for i in range(len(loc)):
        features[i] = tuple(list(loc[i]) + list(prop[i]))
    cc = candidates.CandCollection(features, st.prefs, st.metadata)

    return st, cc
Exemple #10
0
def refine_sdm(sdmname,
               dm,
               preffile='realfast.yml',
               gainpath='/home/mchammer/evladata/telcal/',
               npix_max=None,
               npix_max_orig=None,
               search_sigma=7,
               ddm=100,
               refine=True,
               classify=True,
               devicenum=None,
               workdir=None,
               inprefs=None):
    """  Given candId, look for SDM in portal, then run refinement.
    Assumes this is running on rfnode with CBE lustre.
    npix_max_orig sets the npix_max or the original detection.
    ddm sets +- of dm grid to search
    """

    from rfpipe import metadata, state, pipeline, candidates, util

    if devicenum is None:
        try:
            from distributed import get_worker
            name = get_worker().name
            devicenum = int(name.split('g')[1])
        except ValueError:
            devicenum = 0

    # Searching for gainfile
    datasetId = '{0}'.format('_'.join(
        os.path.basename(sdmname).split('_')[1:-1]))
    # set the paths to the gainfile
    gainname = datasetId + '.GN'
    logging.info('Searching for the gainfile {0} in {1}'.format(
        gainname, gainpath))
    for path, dirs, files in os.walk(gainpath):
        for f in filter(lambda x: gainname in x, files):
            gainfile = os.path.join(path, gainname)
            break

    # Searching all miniSDMs
    if inprefs:
        prefs = inprefs
    else:
        prefs = {
            'saveplots': False,
            'savenoise': False,
            'savesols': False,
            'savecandcollection': False,
            'savecanddata': True,
            'dm_maxloss': 0.01,
            'npix_max': npix_max
        }

    prefs['gainfile'] = gainfile
    prefs['workdir'] = workdir
    prefs['sigma_image1'] = search_sigma
    prefs['maxdm'] = dm + ddm

    bdfdir = metadata.get_bdfdir(sdmfile=sdmname, sdmscan=1)
    band = metadata.sdmband(sdmfile=sdmname, sdmscan=1, bdfdir=bdfdir)
    cc = None

    try:
        st = state.State(sdmfile=sdmname,
                         sdmscan=1,
                         inprefs=prefs,
                         preffile=preffile,
                         name='NRAOdefault' + band,
                         showsummary=False,
                         bdfdir=bdfdir)
    except AssertionError:
        try:
            logger.warning(
                "Could not generate state with full image. Trying with npix_max at 2x original image size..."
            )
            prefs['npix_max'] = min(npix_max, 2 * npix_max_orig)
            st = state.State(sdmfile=sdmname,
                             sdmscan=1,
                             inprefs=prefs,
                             preffile=preffile,
                             name='NRAOdefault' + band,
                             bdfdir=bdfdir,
                             showsummary=False)
        except AssertionError:  # could be state can't be defined
            logger.warning(
                "Could not generate state with 2x images. Trying with original image size..."
            )
            prefs['npix_max'] = min(npix_max, npix_max_orig)
            st = state.State(sdmfile=sdmname,
                             sdmscan=1,
                             inprefs=prefs,
                             preffile=preffile,
                             name='NRAOdefault' + band,
                             bdfdir=bdfdir,
                             showsummary=False)
    except FileNotFoundError as e:
        logger.warning("{0}".format(e))
        return cc

    st.prefs.dmarr = sorted(
        [dm] + [dm0 for dm0 in st.dmarr if (dm0 == 0 or dm0 > dm - ddm)
                ])  # remove superfluous dms, enforce orig dm
    st.clearcache()
    st.summarize()
    ccs = pipeline.pipeline_scan(st, devicenum=devicenum)
    cc = sum(ccs) if len(ccs) else ccs

    # Classify the generated pickles using FETCH and generate refinement plots
    if len(cc):
        maxind = np.where(cc.snrtot == cc.snrtot.max())[0]
        assert len(maxind) == 1
        cd = cc[maxind[0]].canddata[0]
        assert isinstance(cd, candidates.CandData)

        if classify:
            frbprob = candidates.cd_to_fetch(cd,
                                             classify=True,
                                             devicenum=devicenum,
                                             mode='CPU')
            logging.info(
                'FETCH FRB Probability of the candidate {0} is {1}'.format(
                    cd.candid, frbprob))
        else:
            frbprob = None

        if refine:
            logging.info('Generating Refinement plots')
            cd_refined_plot(cd, devicenum, frbprob=frbprob)
    else:
        if prefs['npix_max'] != npix_max_orig:
            logging.info(
                'No candidate was found in first search. Trying again with original image size.'
                .format(cc))
            prefs['npix_max'] = npix_max_orig
            st = state.State(sdmfile=sdmname,
                             sdmscan=1,
                             inprefs=prefs,
                             preffile=preffile,
                             name='NRAOdefault' + band,
                             bdfdir=bdfdir,
                             showsummary=False)

            st.prefs.dmarr = sorted(
                [dm] +
                [dm0 for dm0 in st.dmarr if (dm0 == 0 or dm0 > dm - ddm)
                 ])  # remove superfluous dms, enforce orig dm
            st.clearcache()
            st.summarize()
            ccs = pipeline.pipeline_scan(st, devicenum=devicenum)
            cc = sum(ccs) if len(ccs) else ccs
            if len(cc):
                maxind = np.where(cc.snrtot == cc.snrtot.max())[0]
                assert len(maxind) == 1
                cd = cc[maxind[0]].canddata[0]
                assert isinstance(cd, candidates.CandData)

                if classify:
                    frbprob = candidates.cd_to_fetch(cd,
                                                     classify=True,
                                                     mode='CPU')
                    logging.info(
                        'FETCH FRB Probability of the candidate {0} is {1}'.
                        format(cd.candid, frbprob))
                else:
                    frbprob = None

                if refine:
                    logging.info('Generating Refinement plots')
                    cd_refined_plot(cd, devicenum, frbprob=frbprob)
            else:
                logging.info(
                    'No candidate was found in search at original image size. Giving up.'
                )

    return cc
    len(scans), intents, sdmname))
intent = 'TARGET'
scannums = [
    int(scan.idx) for scan in scans
    if scan.bdf.exists and any([intent in scint for scint in scan.intents])
]
logger.info("Found {0} scans with intent {1} of total {2} scans".format(
    len(scannums), intent, len(scans)))
scannum = scannums[random.randint(0, len(scannums) - 1)]

band = metadata.sdmband(sdmfile=sdmname, sdmscan=scannum)

try:
    st = state.State(sdmfile=sdmname,
                     sdmscan=scannum,
                     preffile=preffile,
                     name='NRAOdefault' + band,
                     showsummary=False)
except ValueError:
    prefs = {'spw': None}
    st = state.State(sdmfile=sdmname,
                     sdmscan=scannum,
                     preffile=preffile,
                     name='NRAOdefault' + band,
                     showsummary=False,
                     inprefs=prefs)

st.prefs.gainfile = gainfile
st.prefs.workdir = '/hyrule/data/users/kshitij/fetchrf/sim_frbs/'  #fetch_data_dir+datasetId
logging.info('Working directory set to {0}'.format(st.prefs.workdir))
def inject_one(preffile, devicenum, outdir):
    """
    Script to inject one simulated FRB on simulated data and save unclustered candidates.

    :param preffile: Preference file with search preferences
    :param devicenum: GPU devicenumber
    :param outdir: Output directory
    :return:
    """
    configs = ["A", "B", "C", "D"]
    bands = ["L", "S", "X", "C"]
    config = configs[np.random.randint(len(configs))]
    band = bands[np.random.randint(len(bands))]

    t0 = time.Time.now().mjd
    meta = metadata.mock_metadata(
        t0,
        t0 + 10 / (24 * 3600),
        20,
        11,
        32 * 4 * 2,
        2,
        5e3,
        scan=1,
        datasource="sim",
        antconfig=config,
        band=band,
    )

    dataset = meta["datasetId"] + "_config_" + config + "_band_" + band

    workdir = outdir + "/" + dataset

    try:
        os.mkdir(workdir)
    except FileExistsError:
        logging.info("Directory {0} exists, using it.".format(workdir))
    except OSError:
        logging.info("Can't create directory {0}".format(workdir))
    else:
        logging.info("Created directory {0}".format(workdir))

    prefs = {}
    prefs["workdir"] = workdir
    prefs["savenoise"] = False
    prefs["fftmode"] = "fftw"
    prefs["nthread"] = 10
    prefs["flaglist"] = [
        ("badchtslide", 4.0, 20),
        ("badchtslide", 4, 20),
        ("badspw", 4.0),
        ("blstd", 3.5, 0.008),
    ]

    st = state.State(
        inmeta=meta,
        showsummary=False,
        preffile=preffile,
        name="NRAOdefault" + band,
        inprefs=prefs,
    )
    segment = 0
    data = source.read_segment(st, segment)
    dmind = None
    dtind = None
    snr = np.random.uniform(low=10, high=40)
    mock = util.make_transient_params(st,
                                      snr=snr,
                                      segment=segment,
                                      data=data,
                                      ntr=1,
                                      lm=-1,
                                      dmind=dmind,
                                      dtind=dtind)

    st.clearcache()
    st.prefs.simulated_transient = mock

    cc = pipeline.pipeline_seg(st=st, segment=segment, devicenum=devicenum)

    if not len(cc):
        logging.info(
            "No candidate found. Deleting the empty pickle, and trying with a higher SNR now."
        )
        pkl = glob.glob(cc.state.prefs.workdir + "/*pkl")[0]
        try:
            os.remove(pkl)
        except OSError as e:
            pass
        snr = snr + 5
        mock = util.make_transient_params(
            st,
            snr=snr,
            segment=segment,
            data=data,
            ntr=1,
            lm=-1,
            dmind=dmind,
            dtind=dtind,
        )

        st.clearcache()
        st.prefs.simulated_transient = mock
        cc = pipeline.pipeline_seg(st=st, segment=segment, devicenum=devicenum)