def _prepare(self, ids, is_train=True, validation=False, cache=True):
        fname = "train_nmf" if is_train else "test_nmf"
        if validation:
            fname = "validation_nmf"
        filename = "%s%s.pkl" % (fname,
                                 "_smooth" if self.smooth_signal else "")
        tcache_path = os.path.join(self._base_path, filename)
        if os.path.exists(tcache_path):
            fd = open(tcache_path, "rb")
            ret = pkl.load(fd)
            fd.close()
            return ret
        tpath = self._train_dir_path if is_train else self._test_dir_path
        ret = []
        for id in tqdm(ids, desc=fname):
            for tscale in TSCALE_LIST:
                dat = pd.read_csv(os.path.join(tpath, "%d.csv" % id),
                                  header=None)

                d0 = dat[0] * 1e-6
                d1 = np.cumsum(d0)
                d2 = dat[1]
                invtscale = 1 / tscale
                tmax = int(d1.values[-1] * tscale)

                bins = EBINS
                zmat = np.zeros((tmax, bins))

                ebins = np.linspace(0, MAX_ENERGY, bins + 1)

                for i in range(int(TOFFS * tscale), tmax):
                    dind = np.argwhere((d1 > i * invtscale)
                                       & (d1 < (i + 1) * invtscale)).flatten()
                    d3 = d2[dind]
                    hist = np.histogram(d3, bins=ebins)[0]
                    if self.smooth_signal:
                        hist = denoise_signal(hist)
                    zmat[i, :] = hist
                ret.append(zmat)
        if cache:
            fd = open(tcache_path, "wb")
            pkl.dump(ret, fd)
            fd.close()
        return ret
    def __init__(self, base_path):
        alg_radmm_base.AlgRadMMBase.__init__(self, base_path)
        self.smooth_signal = False

        sdata = self._source_data
        self.source_hist = np.zeros((10, EBINS))
        for shielding in range(2):
            for source in range(5):
                arr = []
                for binidx in range(EBINS):
                    energyFrom = binidx / EBINS * MAX_ENERGY
                    energyTo = (binidx + 1) / EBINS * MAX_ENERGY
                    dat = sdata[(sdata["Shielding"] == shielding)
                                & (sdata["SourceID"] == source + 1) &
                                (sdata["PhotonEnergy"] > energyFrom) &
                                (sdata["PhotonEnergy"] < energyTo)]
                    arr.append(dat["CountRate"].mean())
                dat1 = denoise_signal(
                    np.array(arr)) if self.smooth_signal else np.array(arr)
                self.source_hist[shielding * 5 + source, :] = np.abs(dat1)
                self.source_hist[shielding * 5 + source, :] /= np.max(
                    self.source_hist[shielding * 5 + source, :])

        kev_per_bin = int(MAX_ENERGY / EBINS)
        self.bin_map_arr = []
        for i in range(len(SOURCE_METADATA)):
            bin_map = dict()
            for elem in SOURCE_METADATA[i]:
                from_idx = _rdown(elem[0], kev_per_bin)
                to_idx = _rup(elem[1], kev_per_bin)
                for idx in range(from_idx, to_idx + 1):
                    bin_map[idx] = 1
            self.bin_map_arr.append(bin_map)
        min_mp_sz = min([len(mp) for mp in self.bin_map_arr])
        self.weigh_thresh_arr = []
        self.weigh_bin_map_arr = []
        for i in range(len(self.bin_map_arr)):
            self.weigh_bin_map_arr.append(min_mp_sz / len(self.bin_map_arr[i]))
            self.bin_map_arr[i] = list(self.bin_map_arr[i])
            self.weigh_thresh_arr.append(len(self.bin_map_arr[i]) / EBINS)
    def _prepare(self, ids, is_train=True, validation=False, cache=True):
        filename = "train.pkl" if is_train else "test.pkl"
        if validation:
            filename = "validation.pkl"
        tcache_path = os.path.join(self._base_path, filename)
        if os.path.exists(tcache_path):
            fd = open(tcache_path, "rb")
            ret = pkl.load(fd)
            fd.close()
            return ret
        tpath = self._train_dir_path if is_train else self._test_dir_path
        ret = []
        for id in tqdm(ids):
            dat = pd.read_csv(os.path.join(tpath, "%d.csv" % id), header=None)

            d0=dat[0]*1e-6
            d1=np.cumsum(d0)
            d2=dat[1]
            tmax=int(d1.values[-1])

            bins = EBINS
            zmat = np.zeros((tmax-TOFFS,bins))

            ebins = np.linspace(0,MAX_ENERGY,bins)

            for i in range(tmax-TOFFS):
                dind = np.argwhere((d1 > (TOFFS + i)) & (d1 < (TOFFS + i + 1))).flatten() 
                d3 = d2[dind]
                hist = np.histogram(d3, bins=ebins)[0]
                hist = denoise_signal(hist)
                zmat[i,:] = hist
            ret.append(zmat)
        if cache:
            fd = open(tcache_path, "wb")
            pkl.dump(ret, fd)
            fd.close()
        return ret
    def predict(self, x, ids, export=False):
        model = load_model(
            "/mnt/ssd/radiologicalthreatsmm/weight_01-val_acc0.820.h5")

        ret = np.zeros((len(ids), 2))

        nn_stat = []
        export_data = []

        for i in tqdm(range(len(ids))):
            id = ids[i]

            arr = []
            tiarr = []
            sourcearr = []
            tscalearr = []

            g_arr = []
            g_tiarr = []
            g_sourcearr = []
            g_tscalearr = []
            g_sres_bgs = []
            g_smooth_arr = []
            g_diff_fit_bg = []

            for is_smooth in range(1):
                for (j, tscale) in enumerate(TSCALE_LIST):
                    dat = np.abs(x[len(TSCALE_LIST) * i + j])
                    tmax = dat.shape[0]

                    if is_smooth:
                        dat = np.abs(denoise_signal(dat))

                    weigh = self.model_bg.transform(dat)

                    weigh_arr_s = []
                    for source in range(len(self.model_arr_bgs)):
                        weigh_arr_s.append(
                            self.model_arr_bgs[source].transform(dat))

                    for ti in range(int(30 * tscale), tmax):
                        fit_bg = np.dot(weigh[ti], self.comps_bg)
                        diff_fit_bg = fit_bg - dat[ti, :]

                        sres = []
                        sres_bg = []
                        sres_bgs = []
                        for source in range(len(self.model_arr_bgs)):
                            fit_bgs = np.dot(
                                weigh_arr_s[source][ti],
                                self.model_arr_bgs[source].components_)
                            diff_fit_bgs = fit_bgs - dat[ti, :]

                            norm_bg = self._calc_source_norm(
                                diff_fit_bg, source)
                            norm_bgs = self._calc_source_norm(
                                diff_fit_bgs, source)

                            sres.append(norm_bg / norm_bgs)
                            sres_bg.append(norm_bg)
                            sres_bgs.append(norm_bgs)

                        if sres:
                            sresi = np.argmax(sres)
                            coeff = SIGNAL_COEFF[sresi]
                            thresh = SIGNAL_THRESHOLD_ARR[sresi +
                                                          is_smooth * 6]
                            bgthresh = BG_THRESHOLD  #BG_THRESHOLD_ARR[sresi]
                            #if sres_bgs[sresi] > BG_THRESHOLD * coeff and sres[sresi] > SIGNAL_THRESHOLD * coeff:
                            if sres_bgs[sresi] > bgthresh and sres[
                                    sresi] > thresh:
                                arr.append(sres[sresi])
                                tiarr.append(ti / tscale)
                                sourcearr.append(sresi)
                                tscalearr.append(tscale)

                            g_arr.append(sres[sresi])
                            g_tiarr.append(ti / tscale)
                            g_sourcearr.append(sresi)
                            g_tscalearr.append(tscale)
                            g_sres_bgs.append(sres_bgs[sresi])
                            g_smooth_arr.append(is_smooth)
                            g_diff_fit_bg.append(diff_fit_bg)

            if arr:
                idx = np.argmax(arr)
                ti = tiarr[idx]
                si = sourcearr[idx]
                toffs = 1 / tscalearr[idx] * 0.5
                ret[i, 0] = 1 + si
                ret[i, 1] = ti + toffs
                nn_stat.append((-1, -1))
            else:
                idx = np.argmax(g_arr)
                ti = g_tiarr[idx]
                si = g_sourcearr[idx]
                toffs = 1 / g_tscalearr[idx] * 0.5
                diff_fit_bg = np.abs(g_diff_fit_bg[idx][:NN_BINS])
                proba = model.predict(diff_fit_bg.reshape(-1, NN_BINS,
                                                          1))[0][0]
                if proba > NN_PROBA:
                    ret[i, 0] = 1 + si
                    ret[i, 1] = ti + toffs
                nn_stat.append((id, proba))

            export_data.append([
                g_arr, g_tiarr, g_sourcearr, g_tscalearr, g_sres_bgs,
                g_smooth_arr
            ])

        if export:
            tcache_path = os.path.join(self._base_path, "export.pkl")
            fd = open(tcache_path, "wb")
            pkl.dump(export_data, fd)
            fd.close()

        if nn_stat:
            tnnstat_path = os.path.join(self._base_path, "nn_stat.pkl")
            fd = open(tnnstat_path, "wb")
            pkl.dump(nn_stat, fd)
            fd.close()

        return ret
    def _get_train_tree_data(self, x, scaleidx, ids):
        tscale1 = TSCALE_LIST[scaleidx]
        sig_list = []
        bg_list = []
        for (i, runid) in enumerate(ids):
            source_id = self._train_metadata.loc[runid]["SourceID"]
            source_time = self._train_metadata.loc[runid]["SourceTime"]
            if source_id != 0:
                if source_time < TTHRESH:
                    continue
                if x[i * len(TSCALE_LIST) + 2].shape[0] < (source_time + 5 +
                                                           5):
                    continue
                sig_list.append((i, runid))
            else:
                if x[i * len(TSCALE_LIST) + 2].shape[0] < (TTHRESH + 5 + 5):
                    continue
                bg_list.append((i, runid))
        np.random.shuffle(sig_list)
        np.random.shuffle(bg_list)

        min_sz = min(len(sig_list), len(bg_list))
        sig_list = sig_list[:min_sz]
        bg_list = bg_list[:min_sz]

        tpath = self._train_dir_path

        xlist = []
        ylist = []
        for elem in ((1, sig_list), (0, bg_list)):
            for (idx, runid) in tqdm(elem[1], desc="train(%d)" % (scaleidx)):
                source_time = 0
                if elem[0]:
                    source_time = self._train_metadata.loc[runid]["SourceTime"]
                else:
                    source_time = TTHRESH

                for j in [scaleidx]:  #range(len(TSCALE_LIST)):
                    tscale = TSCALE_LIST[j]
                    invtscale = 1 / tscale

                    g_dat = pd.read_csv(os.path.join(tpath, "%d.csv" % runid),
                                        header=None)
                    d0 = g_dat[0] * 1e-6
                    d1 = np.cumsum(d0)
                    d2 = g_dat[1]
                    bins = EBINS
                    ebins = np.linspace(0, MAX_ENERGY, bins + 1)
                    tmax = d1.values[-1]

                    tstep = TSTEP_PER_SCALE[j]
                    tcurr = source_time

                    timeHist = np.histogram(d1, bins=1024)[0]
                    timeHist = denoise_signal(timeHist)
                    peaks, _ = find_peaks(timeHist, prominence=(5))
                    peaksS = peaks / 1024 * tmax

                    hist_list = []
                    tiarr = []
                    tscalearr = []

                    twin = TWIN_PER_SCALE[scaleidx]
                    twinoffs = int(twin / 2)

                    inp = []
                    toffs_arr = []
                    for tinc in range(twin):
                        ttoffs_s = (tinc - twinoffs) * tstep
                        assert (tcurr + ttoffs_s > TOFFS)
                        assert (tcurr + ttoffs_s + invtscale < tmax)
                        dind = np.argwhere((d1 > tcurr + ttoffs_s) & (
                            d1 < tcurr + ttoffs_s + invtscale)).flatten()
                        d3 = d2[dind]
                        hist = np.histogram(d3, bins=ebins)[0]

                        inp.append(hist)
                        toffs_arr.append(tcurr + ttoffs_s)

                    xrow = self._row2record(self.model_bgs, inp, toffs_arr,
                                            peaksS, tmax)

                    xlist.append(xrow)
                    ylist.append(elem[0])

        xlist = np.vstack(xlist)
        ylist = np.vstack(ylist)

        return (xlist, ylist)
Example #6
0
    def predict(self, x, ids, export=False):
        ret = np.zeros((len(ids), 2))

        export_data = []

        for i in tqdm(range(len(ids))):
            id = ids[i]

            arr = []
            tiarr = []
            sourcearr = []
            tscalearr = []

            g_arr = []
            g_tiarr = []
            g_sourcearr = []
            g_tscalearr = []
            g_sres_bgs = []
            g_smooth_arr = []

            #for is_smooth in range(2):
            for is_smooth in range(1):
                for (j, tscale) in enumerate(TSCALE_LIST):
                    dat = np.abs(x[len(TSCALE_LIST)*i + j])
                    tmax = dat.shape[0]

                    if is_smooth:
                        dat = np.abs(denoise_signal(dat))
    
                    weigh = self.model_bg.transform(dat)
    
                    weigh_arr_s = []
                    for source in range(len(self.model_arr_bgs)):
                        weigh_arr_s.append(self.model_arr_bgs[source].transform(dat))
    
                    for ti in range(int(30*tscale),tmax):
                        fit_bg = np.dot(weigh[ti], self.comps_bg)
                        diff_fit_bg = fit_bg - dat[ti, :]
        
                        sres = []
                        sres_bg = []
                        sres_bgs = []
                        for source in range(len(self.model_arr_bgs)):
                            fit_bgs = np.dot(weigh_arr_s[source][ti], self.model_arr_bgs[source].components_)
                            diff_fit_bgs = fit_bgs - dat[ti, :]
        
                            norm_bg = self._calc_source_norm(diff_fit_bg, source)
                            norm_bgs = self._calc_source_norm(diff_fit_bgs, source)
        
                            sres.append(norm_bg / norm_bgs)
                            sres_bg.append(norm_bg)
                            sres_bgs.append(norm_bgs)
        
                        if sres:
                            sresi = np.argmax(sres)
                            coeff = SIGNAL_COEFF[sresi]
                            thresh = SIGNAL_THRESHOLD_ARR[sresi + is_smooth * 6]
                            bgthresh = BG_THRESHOLD #BG_THRESHOLD_ARR[sresi]
                            #if sres_bgs[sresi] > BG_THRESHOLD * coeff and sres[sresi] > SIGNAL_THRESHOLD * coeff:
                            if sres_bgs[sresi] > bgthresh and sres[sresi] > thresh:
                                arr.append(sres[sresi])
                                tiarr.append(ti / tscale)
                                sourcearr.append(sresi)
                                tscalearr.append(tscale)
    
                            g_arr.append(sres[sresi])
                            g_tiarr.append(ti / tscale)
                            g_sourcearr.append(sresi)
                            g_tscalearr.append(tscale)
                            g_sres_bgs.append(sres_bgs[sresi])
                            g_smooth_arr.append(is_smooth)
    
            if arr:
                idx = np.argmax(arr)
                ti = tiarr[idx]
                si = sourcearr[idx]
                toffs = 1/tscalearr[idx] * 0.5
                ret[i, 0] = 1 + si
                ret[i, 1] = ti + toffs

            export_data.append([g_arr, g_tiarr, g_sourcearr, g_tscalearr, g_sres_bgs, g_smooth_arr])

        if export:
            tcache_path = os.path.join(self._base_path, "export.pkl")
            fd = open(tcache_path, "wb")
            pkl.dump(export_data, fd)
            fd.close()

        return ret