Beispiel #1
0
def pipeline_scan(st,
                  segments=None,
                  cfile=None,
                  vys_timeout=vys_timeout_default,
                  devicenum=None):
    """ Given rfpipe state run search pipline on all segments in a scan.
        state/preference has fftmode that will determine functions used here.
    """

    from rfpipe import candidates

    # initialize with empty cc
    candcollection = candidates.CandCollection(prefs=st.prefs,
                                               metadata=st.metadata)

    if not isinstance(segments, list):
        segments = list(range(st.nsegment))

    for segment in segments:
        candcollection += pipeline_seg(st,
                                       segment,
                                       devicenum=devicenum,
                                       cfile=cfile,
                                       vys_timeout=vys_timeout)

    return candcollection
Beispiel #2
0
def reproduce_candcollection(cc, data, wisdom=None):
    """ Calculates canddata for each cand in candcollection.
    Will look for cluster label and filter only for peak snr, if available.
    Location (e.g., integration, dm, dt) of each is used to create
    canddata for each candidate.
    """

    # set up output cc
    st = cc.state
    cc1 = candidates.CandCollection(prefs=st.prefs, metadata=st.metadata)

    if len(cc):
        candlocs = cc.locs
        snrs = cc.snrtot

        if 'cluster' in cc.array.dtype.fields:
            clusters = cc.array['cluster'].astype(int)
            cl_rank, cl_count = candidates.calc_cluster_rank(cc)
            calcinds = np.unique(np.where(cl_rank == 1)[0])
            logger.debug("Reproducing cands at {0} cluster peaks"
                         .format(len(calcinds)))
        else:
            logger.debug("No cluster field found. Reproducing all.")
            calcinds = list(range(len(cc)))

        # reproduce canddata for each
        calcinds.sort()
        for i in calcinds:
            # TODO: check on best way to find max SNR with kalman, etc
            snr = snrs[i]
            candloc = candlocs[i]

            # kwargs passed to canddata object for plotting/saving
            kwargs = {}
            if 'cluster' in cc.array.dtype.fields:
                logger.info("Cluster {0}/{1} has {2} candidates and max SNR {3} at {4}"
                            .format(clusters[i], len(calcinds)-1, cl_count[i],
                                    snr, candloc))
                # add supplementary plotting and cc info
                kwargs['cluster'] = clusters[i]
                kwargs['clustersize'] = cl_count[i]
            else:
                logger.info("Candidate {0}/{1} has SNR {2} at {3}"
                            .format(i, len(calcinds)-1, snr, candloc))

            # TODO: reproduce these here, too
            for kw in ['snrk', 'snrarms']:
                if kw in cc.array.dtype.fields:
                    kwargs[kw] = cc.array[kw][i]

            # reproduce candidate
            data_corr = rfpipe.reproduce.pipeline_datacorrect(st, candloc, data)
            cd = rfpipe.reproduce.pipeline_imdata(st, candloc, data_corr,
                                                  cpuonly=True, **kwargs)
            cc1 += candidates.save_and_plot(cd)

            # TODO: validate that reproduced features match input features?
    #        peakx, peaky = np.where(image[0] == image[0].max())
    #        l1, m1 = st.calclm(st.npixx_full, st.npixy_full,
    #                           st.uvres, peakx[0], peaky[0])
    #        immax1 = image.max()
    #        snr1 = immax1/image.std()

    return cc1
Beispiel #3
0
def dedisperse_search_cuda(st, segment, data, devicenum=None):
    """ Run dedispersion, resample for all dm and dt.
    Grid and image on GPU.
    rfgpu is built from separate repo.
    Uses state to define integrations to image based on segment, dm, and dt.
    devicenum can force the gpu to use, but can be inferred via distributed.
    """

    assert st.dtarr[0] == 1, "st.dtarr[0] assumed to be 1"
    assert all([st.dtarr[dtind]*2 == st.dtarr[dtind+1]
                for dtind in range(len(st.dtarr)-1)]), ("dtarr must increase "
                                                        "by factors of 2")

    if not np.any(data):
        logger.info("Data is all zeros. Skipping search.")
        return candidates.CandCollection(prefs=st.prefs,
                                         metadata=st.metadata)

    if devicenum is None:
        # assume first gpu, but try to infer from worker name
        devicenum = 0
        try:
            from distributed import get_worker
            name = get_worker().name
            devicenum = int(name.split('g')[1])
            logger.debug("Using name {0} to set GPU devicenum to {1}"
                         .format(name, devicenum))
        except IndexError:
            logger.warn("Could not parse worker name {0}. Using default GPU devicenum {1}"
                        .format(name, devicenum))
        except ValueError:
            logger.warn("No worker found. Using default GPU devicenum {0}"
                        .format(devicenum))
        except ImportError:
            logger.warn("distributed not available. Using default GPU devicenum {0}"
                        .format(devicenum))

    rfgpu.cudaSetDevice(devicenum)

    beamnum = 0
    uvw = util.get_uvw_segment(st, segment)

    upix = st.npixx
    vpix = st.npixy//2 + 1

    grid = rfgpu.Grid(st.nbl, st.nchan, st.readints, upix, vpix)
    image = rfgpu.Image(st.npixx, st.npixy)
    image.add_stat('rms')
    image.add_stat('pix')

    # Data buffers on GPU
    vis_raw = rfgpu.GPUArrayComplex((st.nbl, st.nchan, st.readints))
    vis_grid = rfgpu.GPUArrayComplex((upix, vpix))
    img_grid = rfgpu.GPUArrayReal((st.npixx, st.npixy))

    # Convert uv from lambda to us
    u, v, w = uvw
    u_us = 1e6*u[:, 0]/(1e9*st.freq[0])
    v_us = 1e6*v[:, 0]/(1e9*st.freq[0])

    # Q: set input units to be uv (lambda), freq in GHz?
    grid.set_uv(u_us, v_us)  # u, v in us
    grid.set_freq(st.freq*1e3)  # freq in MHz
    grid.set_cell(st.uvres)  # uv cell size in wavelengths (== 1/FoV(radians))

    # Compute gridding transform
    grid.compute()

    # move Stokes I data in (assumes dual pol data)
    vis_raw.data[:] = np.rollaxis(data.mean(axis=3), 0, 3)
    vis_raw.h2d()  # Send it to GPU memory

    grid.conjugate(vis_raw)

    # some prep if kalman filter is to be applied
    if st.prefs.searchtype in ['imagek']:
        # TODO: check that this is ok if pointing at bright source
        spec_std = data.real.mean(axis=1).mean(axis=2).std(axis=0)
        sig_ts, kalman_coeffs = kalman_prepare_coeffs(spec_std)
        if not np.all(sig_ts):
            logger.info("sig_ts all zeros. Skipping search.")
            return candidates.CandCollection(prefs=st.prefs,
                                             metadata=st.metadata)

    # place to hold intermediate result lists
    canddict = {}
    canddict['candloc'] = []
    for feat in st.features:
        canddict[feat] = []

    for dtind in range(len(st.dtarr)):
        if dtind > 0:
            grid.downsample(vis_raw)

        for dmind in range(len(st.dmarr)):
            delay = util.calc_delay(st.freq, st.freq.max(), st.dmarr[dmind],
                                    st.inttime)

            grid.set_shift(delay >> dtind)  # dispersion shift per chan in samples

            integrations = st.get_search_ints(segment, dmind, dtind)
            if len(integrations) == 0:
                continue
            minint = min(integrations)
            maxint = max(integrations)

            logger.info('Imaging {0} ints ({1}-{2}) in seg {3} at DM/dt {4:.1f}/{5}'
                        ' with image {6}x{7} (uvres {8}) with gpu {9}'
                        .format(len(integrations), minint, maxint, segment,
                                st.dmarr[dmind], st.dtarr[dtind], st.npixx,
                                st.npixy, st.uvres, devicenum))

            for i in integrations:
                # grid and FFT
                grid.operate(vis_raw, vis_grid, i)
                image.operate(vis_grid, img_grid)

                # calc snr
                stats = image.stats(img_grid)
                if stats['rms'] != 0.:
                    snr1 = stats['max']/stats['rms']
                else:
                    snr1 = 0.
                    logger.warn("rfgpu rms is 0 in int {0}. Skipping.".format(i))

                # threshold image
                if snr1 > st.prefs.sigma_image1:
                    candloc = (segment, i, dmind, dtind, beamnum)

                    xpeak = stats['xpeak']
                    ypeak = stats['ypeak']
                    l1, m1 = st.pixtolm((xpeak+st.npixx//2, ypeak+st.npixy//2))

                    if st.prefs.searchtype == 'image':
                        logger.info("Got one! SNR1 {0:.1f} candidate at {1} and (l, m) = ({2},{3})"
                                    .format(snr1, candloc, l1, m1))
                        canddict['candloc'].append(candloc)
                        canddict['l1'].append(l1)
                        canddict['m1'].append(m1)
                        canddict['snr1'].append(snr1)
                        canddict['immax1'].append(stats['max'])

                    elif st.prefs.searchtype == 'imagek':
                        # TODO: implement phasing on GPU
                        data_corr = dedisperseresample(data, delay,
                                                       st.dtarr[dtind],
                                                       parallel=st.prefs.nthread > 1,
                                                       resamplefirst=st.fftmode=='cuda')
                        spec = data_corr.take([i], axis=0)
                        util.phase_shift(spec, uvw, l1, m1)
                        spec = spec[0].real.mean(axis=2).mean(axis=0)

                        # TODO: this significance can be biased low if averaging in long baselines that are not phased well
                        # TODO: spec should be calculated from baselines used to measure l,m?
                        significance_kalman = kalman_significance(spec,
                                                                  spec_std,
                                                                  sig_ts=sig_ts,
                                                                  coeffs=kalman_coeffs)
                        snrk = (2*significance_kalman)**0.5
                        snrtot = (snrk**2 + snr1**2)**0.5
                        if snrtot > (st.prefs.sigma_kalman**2 + st.prefs.sigma_image1**2)**0.5:
                            logger.info("Got one! SNR1 {0:.1f} and SNRk {1:.1f} candidate at {2} and (l,m) = ({3},{4})"
                                        .format(snr1, snrk, candloc, l1, m1))
                            canddict['candloc'].append(candloc)
                            canddict['l1'].append(l1)
                            canddict['m1'].append(m1)
                            canddict['snr1'].append(snr1)
                            canddict['immax1'].append(stats['max'])
                            canddict['snrk'].append(snrk)
                    elif st.prefs.searchtype == 'armkimage':
                        raise NotImplementedError
                    elif st.prefs.searchtype == 'armk':
                        raise NotImplementedError
                    else:
                        logger.warn("searchtype {0} not recognized"
                                    .format(st.prefs.searchtype))

    cc = candidates.make_candcollection(st, **canddict)
    logger.info("First pass found {0} candidates in seg {1}."
                .format(len(cc), segment))

    if st.prefs.clustercands is not None:
        cc = candidates.cluster_candidates(cc)

    if st.prefs.savecands or st.prefs.saveplots:
        # triggers optional plotting and saving
        cc = reproduce_candcollection(cc, data)

    candidates.save_cands(st, candcollection=cc)

    return cc
Beispiel #4
0
def dedisperse_search_fftw(st, segment, data, wisdom=None):
    """ Fuse the dediserpse, resample, search, threshold functions.
    Returns list of CandData objects that define candidates with
    candloc, image, and phased visibility data.
    Integrations can define subset of all available in data to search.
    Default will take integrations not searched in neighboring segments.

    ** only supports threshold > image max (no min)
    ** dmind, dtind, beamnum assumed to represent current state of data
    """

    if not np.any(data):
        logger.info("Data is all zeros. Skipping search.")
        return candidates.CandCollection(prefs=st.prefs,
                                         metadata=st.metadata)

    # some prep if kalman filter is to be applied
    if st.prefs.searchtype in ['imagek', 'armk', 'armkimage']:
        # TODO: check that this is ok if pointing at bright source
        spec_std = data.real.mean(axis=1).mean(axis=2).std(axis=0)
        sig_ts, kalman_coeffs = kalman_prepare_coeffs(spec_std)

    beamnum = 0
    uvw = util.get_uvw_segment(st, segment)

    # place to hold intermediate result lists
    canddict = {}
    canddict['candloc'] = []
    for feat in st.features:
        canddict[feat] = []

    for dtind in range(len(st.dtarr)):
        for dmind in range(len(st.dmarr)):
            # set search integrations
            integrations = st.get_search_ints(segment, dmind, dtind)
            if len(integrations) == 0:
                continue
            minint = min(integrations)
            maxint = max(integrations)

            logger.info('{0} search of {1} ints ({2}-{3}) in seg {4} at DM/dt '
                        '{5:.1f}/{6} with image {7}x{8} (uvres {9}) with fftw'
                        .format(st.prefs.searchtype, len(integrations), minint,
                                maxint, segment, st.dmarr[dmind],
                                st.dtarr[dtind], st.npixx,
                                st.npixy, st.uvres))

            # correct data
            delay = util.calc_delay(st.freq, st.freq.max(), st.dmarr[dmind],
                                    st.inttime)
            data_corr = dedisperseresample(data, delay, st.dtarr[dtind],
                                           parallel=st.prefs.nthread > 1,
                                           resamplefirst=st.fftmode=='cuda')

            # run search
            if st.prefs.searchtype in ['image', 'imagek']:
                images = grid_image(data_corr, uvw, st.npixx, st.npixy, st.uvres,
                                    'fftw', st.prefs.nthread, wisdom=wisdom,
                                    integrations=integrations)

                for i, image in enumerate(images):
                    immax1 = image.max()
                    snr1 = immax1/image.std()
                    if snr1 > st.prefs.sigma_image1:
                        candloc = (segment, integrations[i], dmind, dtind, beamnum)
                        l1, m1 = st.pixtolm(np.where(image == immax1))

                        # if set, use sigma_kalman as second stage filter
                        if st.prefs.searchtype == 'imagek':
                            spec = data_corr.take([integrations[i]], axis=0)
                            util.phase_shift(spec, uvw, l1, m1)
                            spec = spec[0].real.mean(axis=2).mean(axis=0)
                            # TODO: this significance can be biased low if averaging in long baselines that are not phased well
                            # TODO: spec should be calculated from baselines used to measure l,m?
                            significance_kalman = kalman_significance(spec,
                                                                      spec_std,
                                                                      sig_ts=sig_ts,
                                                                      coeffs=kalman_coeffs)
                            snrk = (2*significance_kalman)**0.5
                            snrtot = (snrk**2 + snr1**2)**0.5
                            if snrtot > (st.prefs.sigma_kalman**2 + st.prefs.sigma_image1**2)**0.5:
                                logger.info("Got one! SNR1 {0:.1f} and SNRk {1:.1f} candidate at {2} and (l,m) = ({3},{4})"
                                            .format(snr1, snrk, candloc, l1, m1))
                                canddict['candloc'].append(candloc)
                                canddict['l1'].append(l1)
                                canddict['m1'].append(m1)
                                canddict['snr1'].append(snr1)
                                canddict['immax1'].append(immax1)
                                canddict['snrk'].append(snrk)
                        elif st.prefs.searchtype == 'image':
                            logger.info("Got one! SNR1 {0:.1f} candidate at {1} and (l, m) = ({2},{3})"
                                        .format(snr1, candloc, l1, m1))
                            canddict['candloc'].append(candloc)
                            canddict['l1'].append(l1)
                            canddict['m1'].append(m1)
                            canddict['snr1'].append(snr1)
                            canddict['immax1'].append(immax1)

            elif st.prefs.searchtype in ['armkimage', 'armk']:
                armk_candidates = search_thresh_armk(st, data_corr, uvw,
                                                     integrations=integrations,
                                                     spec_std=spec_std,
                                                     sig_ts=sig_ts,
                                                     coeffs=kalman_coeffs)

                for candind, snrarms, snrk, armloc, peakxy, lm in armk_candidates:
                    candloc = (segment, candind, dmind, dtind, beamnum)

                    # if set, use sigma_kalman as second stage filter
                    if st.prefs.searchtype == 'armkimage':
                        image = grid_image(data_corr, uvw, st.npixx_full,
                                           st.npixy_full, st.uvres, 'fftw',
                                           st.prefs.nthread,
                                           wisdom=wisdom, integrations=candind)
                        peakx, peaky = np.where(image[0] == image[0].max())
                        l1, m1 = st.calclm(st.npixx_full, st.npixy_full,
                                           st.uvres, peakx[0], peaky[0])
                        immax1 = image.max()
                        snr1 = immax1/image.std()
                        if snr1 > st.prefs.sigma_image1:
                            logger.info("Got one! SNRarms {0:.1f} and SNRk "
                                        "{1:.1f} and SNR1 {2:.1f} candidate at"
                                        " {3} and (l,m) = ({4},{5})"
                                        .format(snrarms, snrk, snr1,
                                                candloc, l1, m1))
                            canddict['candloc'].append(candloc)
                            canddict['l1'].append(l1)
                            canddict['m1'].append(m1)
                            canddict['snrarms'].append(snrarms)
                            canddict['snrk'].append(snrk)
                            canddict['snr1'].append(snr1)
                            canddict['immax1'].append(immax1)

                    elif st.prefs.searchtype == 'armk':
                        l1, m1 = lm
                        logger.info("Got one! SNRarms {0:.1f} and SNRk {1:.1f} "
                                    "candidate at {2} and (l,m) = ({3},{4})"
                                    .format(snrarms, snrk, candloc, l1, m1))
                        canddict['candloc'].append(candloc)
                        canddict['l1'].append(l1)
                        canddict['m1'].append(m1)
                        canddict['snrarms'].append(snrarms)
                        canddict['snrk'].append(snrk)
            else:
                raise NotImplemented("only searchtype=image, imagek, armk, armkimage implemented")

    cc = candidates.make_candcollection(st, **canddict)
    logger.info("First pass found {0} candidates in seg {1}."
                .format(len(cc), segment))

    if st.prefs.clustercands is not None:
        cc = candidates.cluster_candidates(cc)

    if st.prefs.savecands or st.prefs.saveplots:
        # triggers optional plotting and saving
        cc = reproduce_candcollection(cc, data)

    candidates.save_cands(st, candcollection=cc)

    return cc
Beispiel #5
0
def oldcands_readone(candsfile, scan=None):
    """ Reads old-style candidate files to create new state and candidate
    collection for a given scan.
    Parsing merged cands file requires sdm locally with bdf for given scan.
    If no scan provided, assumes candsfile is from single scan not merged.
    """

    from rfpipe import preferences, metadata, state, candidates

    with open(candsfile, 'rb') as pkl:
        try:
            d = pickle.load(pkl)
            ret = pickle.load(pkl)
        except UnicodeDecodeError:
            d = pickle.load(pkl, encoding='latin-1')
            ret = pickle.load(pkl, encoding='latin-1')
        if isinstance(ret, tuple):
            loc, prop = ret
        elif isinstance(ret, dict):
            loc = np.array(list(ret.keys()))
            prop = np.array(list(ret.values()))
        else:
            logger.warning(
                "Not sure what we've got in this here cands pkl file...")

    # detect merged vs nonmerged
    if 'scan' in d['featureind']:
        locind0 = 1
    else:
        locind0 = 0

    # merged candsfiles must be called with scan arg
    if scan is None:
        assert locind0 == 0, "Set scan if candsfile has multiple scans."

    inprefs = preferences.oldstate_preferences(d, scan=scan)
    inprefs.pop('gainfile')
    inprefs.pop('workdir')
    inprefs.pop('fileroot')
    inprefs['segmenttimes'] = inprefs['segmenttimes']
    sdmfile = os.path.basename(d['filename'])

    try:
        assert scan is not None
        st = state.State(sdmfile=sdmfile, sdmscan=scan, inprefs=inprefs)
    except:
        meta = metadata.oldstate_metadata(d, scan=scan)
        st = state.State(inmeta=meta, inprefs=inprefs, showsummary=False)

    if 'rtpipe_version' in d:
        st.rtpipe_version = float(d['rtpipe_version'])  # TODO test this
        if st.rtpipe_version <= 1.54:
            logger.info('Candidates detected with rtpipe version {0}. All '
                        'versions <=1.54 used incorrect DM scaling.'.format(
                            st.rtpipe_version))

    if scan is None:
        assert locind0 == 0, "Set scan if candsfile has multiple scans."
        scan = d['scan']

    logger.info('Calculating candidate properties for scan {0}'.format(scan))

    if locind0 == 1:
        loc = loc[np.where(loc[:, 0] == scan)][:, locind0:]

    print(st.features, st.prefs.searchtype)
    fields = [str(ff) for ff in st.search_dimensions + st.features]
    types = [
        str(tt) for tt in len(st.search_dimensions) * ['<i4'] +
        len(st.features) * ['<f4']
    ]
    dtype = np.dtype({'names': fields, 'formats': types})
    features = np.zeros(len(loc), dtype=dtype)
    for i in range(len(loc)):
        features[i] = tuple(list(loc[i]) + list(prop[i]))
    cc = candidates.CandCollection(features, st.prefs, st.metadata)

    return st, cc
Beispiel #6
0
def reproduce_candcollection(cc,
                             data=None,
                             wisdom=None,
                             spec_std=None,
                             sig_ts=[],
                             kalman_coeffs=[]):
    """ Uses candcollection to make new candcollection with required info.
    Will look for cluster label and filter only for peak snr, if available.
    Location (e.g., integration, dm, dt) of each is used to create
    canddata for each candidate, if required.
    Can calculates features not used directly for search (as defined in
    state.prefs.calcfeatures).
    """

    from rfpipe import candidates, util

    # set up output cc
    st = cc.state
    cc1 = candidates.CandCollection(prefs=st.prefs, metadata=st.metadata)

    if len(cc):
        if 'cluster' in cc.array.dtype.fields:
            clusters = cc.array['cluster'].astype(int)
            cl_rank, cl_count = candidates.calc_cluster_rank(cc)
            calcinds = np.unique(np.where(cl_rank == 1)[0]).tolist()
            logger.debug("Reproducing cands at {0} cluster peaks".format(
                len(calcinds)))
        else:
            logger.debug("No cluster field found. Reproducing all.")
            calcinds = list(range(len(cc)))

        # if candidates that need new feature calculations
        if not all([f in cc.array.dtype.fields for f in st.features]):
            logger.info("Generating canddata for {0} candidates".format(
                len(calcinds)))

            candlocs = cc.locs
            snrs = cc.snrtot
            normprob = candidates.normprob(snrs, st.ntrials)
            snrmax = snrs.max()
            logger.info('Zscore/SNR for strongest candidate: {0}/{1}'.format(
                normprob[np.where(snrs == snrmax)[0]][0], snrmax))

            if ('snrk' in st.features and 'snrk' not in cc.array.dtype.fields
                    and (spec_std is None or not len(sig_ts)
                         or not len(kalman_coeffs))):
                # TODO: use same kalman calc for search as reproduce?
                spec_std, sig_ts, kalman_coeffs = util.kalman_prep(data)

            # reproduce canddata for each
            for i in calcinds:
                # TODO: check on best way to find max SNR with kalman, etc
                snr = snrs[i]
                candloc = candlocs[i]

                # kwargs passed to canddata object for plotting/saving
                kwargs = {}
                if 'cluster' in cc.array.dtype.fields:
                    logger.info(
                        "Cluster {0}/{1} has {2} candidates and max detected SNR {3:.1f} at {4}"
                        .format(calcinds.index(i),
                                len(calcinds) - 1, cl_count[i], snr, candloc))
                    # add supplementary plotting and cc info
                    kwargs['cluster'] = clusters[i]
                    kwargs['clustersize'] = cl_count[i]
                else:
                    logger.info(
                        "Candidate {0}/{1} has detected SNR {2:.1f} at {3}".
                        format(calcinds.index(i),
                               len(calcinds) - 1, snr, candloc))

                # reproduce candidate and get/calc features
                data_corr = pipeline_datacorrect(st, candloc, data_prep=data)

                for feature in st.features:
                    if feature in cc.array.dtype.fields:  # if already calculated
                        kwargs[feature] = cc.array[feature][i]
                    else:  # if desired, but not calculated here or from canddata
                        if feature == 'snrk':
                            if 'snrk' not in cc.array.dtype.fields:
                                spec = data_corr.real.mean(axis=3).mean(
                                    axis=1)[candloc[1]]

                                if np.count_nonzero(spec) / len(
                                        spec) > 1 - st.prefs.max_zerofrac:
                                    significance_kalman = -kalman_significance(
                                        spec,
                                        spec_std,
                                        sig_ts=sig_ts,
                                        coeffs=kalman_coeffs)
                                    snrk = (2 * significance_kalman)**0.5
                                else:
                                    logger.warning(
                                        "snrk set to 0, since {0}/{1} are zeroed"
                                        .format(
                                            len(spec) - np.count_nonzero(spec),
                                            len(spec)))
                                    snrk = 0.
                                logger.info(
                                    "Calculated snrk of {0} after detection. "
                                    "Adding it to CandData.".format(snrk))
                                kwargs[feature] = snrk

                cd = pipeline_canddata(st,
                                       candloc,
                                       data_corr,
                                       spec_std=spec_std,
                                       sig_ts=sig_ts,
                                       kalman_coeffs=kalman_coeffs,
                                       **kwargs)

                if st.prefs.saveplots:
                    candidates.candplot(cd,
                                        snrs=snrs)  # snrs before clustering

                # regenerate cc with extra features in cd
                cc1 += candidates.cd_to_cc(cd)

        # if candidates that do not need new featuers, just select peaks
        else:
            logger.info(
                "Using clustering info to select {0} candidates".format(
                    len(calcinds)))
            cc1.array = cc.array.take(calcinds)

    return cc1