Exemple #1
0
def get_external_bci(conf=CONF, interval='jeffreys', verb=True):
    """Import external data sets and calculate BCI.
    Inputs:
        conf: confidence level, default 90%
        interval: 
        verb: if True, print BCIs as they are calculated
    Output:
        external_bci: DataFrame of binomial confidence intervals
    """

    # Import ASAS-SN and ZTF SNe
    asassn_det, asassn_all = count_asassn_sne()
    ztf_det, ztf_all = count_ztf_sne()

    # Calculate binomial confidence intervals for external data
    print('\nExternal measures of f_CSM:')
    asassn_bci = 100 * binom_conf_interval(
        asassn_det, asassn_all, confidence_level=conf, interval=interval)
    ztf_bci = 100 * binom_conf_interval(
        ztf_det, ztf_all, confidence_level=conf, interval=interval)

    if verb:
        print('ASAS-SN')
        print(asassn_bci)
        print('ZTF')
        print(ztf_bci)

    external_bci = pd.DataFrame([asassn_bci, ztf_bci],
                                index=['ASAS-SN', 'ZTF'],
                                columns=['bci_lower', 'bci_upper'])
    return external_bci
Exemple #2
0
 def peaks_and_thresh(self):
     """Get an estimate of the peak positions and standard deviations given a set threshold
     Then set the threshold as 5 standard deviations above background
     returns:
     images processed, loading probability, error in loading probability, bg count, bg width, 
     signal count, signal width, separation, fidelity, error in fidelity, threshold"""
     # split histograms at threshold then get mean and stdev:
     ascend = np.sort(self.counts[:self.im_num])
     bg = ascend[ascend < self.thresh]  # background
     signal = ascend[ascend > self.thresh]  # signal above threshold
     bg_peak = np.mean(bg)
     bg_stdv = np.std(bg, ddof=1)
     at_peak = np.mean(signal)
     at_stdv = np.std(signal, ddof=1)
     sep = at_peak - bg_peak
     self.thresh = bg_peak + 5 * bg_stdv  # update threshold
     # atom is present if the counts are above threshold
     self.atom[:self.im_num] = self.counts[:self.im_num] // self.thresh
     atom_count = np.size(
         np.where(self.atom > 0)[0])  # images with counts above threshold
     empty_count = np.size(np.where(self.atom[:self.im_num] == 0)[0])
     load_prob = np.around(atom_count / self.im_num, 4)
     conf = binom_conf_interval(atom_count,
                                atom_count + empty_count,
                                interval='jeffreys')
     uplperr = conf[1] - loading_prob  # 1 sigma confidence above mean
     lolperr = loading_prob - conf[0]  # 1 sigma confidence below mean
     load_err = np.mean([uplperr, lolperr])
     self.fidelity, self.err_fidelity = np.around(self.get_fidelity(), 4)
     return np.array(self.im_num, load_prob, load_err, bg_peak, bg_stdv,
                     at_peak, at_stdv, sep, self.fidelity,
                     self.err_fidelity, self.thresh)
Exemple #3
0
def plot_effective_area(df_cuts, mc_spectrum, out_path):
    bins, bin_centers, bin_widths = make_default_cta_binning(
        bins_per_decade=15)
    hist_mc = mc_spectrum.expected_events_for_bins(energy_bins=bins)
    hist_df, _ = np.histogram(df_cuts.mc_energy.values, bins=bins)

    invalid = hist_df > hist_mc
    hist_df[invalid] = hist_mc[invalid]

    lower_conf, upper_conf = binom_conf_interval(hist_df, hist_mc, 0.95)
    gen_area = mc_spectrum.generation_area
    lower_conf = lower_conf * gen_area
    upper_conf = upper_conf * gen_area
    area = (hist_df / hist_mc) * gen_area
    lower_error = area - lower_conf
    upper_error = upper_conf - area
    mask = area > 0

    fig, ax = plt.subplots(1, 1, figsize=figsize)
    #ax.plot([1,2,3])
    plt.errorbar(bin_centers.value[mask],
                 area.value[mask],
                 xerr=bin_widths.value[mask] / 2.0,
                 yerr=[lower_error.value[mask], upper_error.value[mask]],
                 linestyle="")

    reference = True
    if reference:
        from cta_plots.sensitivity import load_effective_area_reference
        df = load_effective_area_reference()
        plt.plot(df.energy, df.effective_area, '--', label='Reference')
    #ax.set_title('optisch anpassen, legende adden')
    ax.set_xscale('log')
    ax.set_yscale('log')
    fig.savefig(out_path)
Exemple #4
0
 def conf(self, success, total):
     """Return the Binomial confidence at 1 sigma"""
     try:
         sp = success / total
         conf = binom_conf_interval(success, total, interval='jeffreys')
         uperr = conf[1] - sp # 1 sigma confidence above mean
         loerr = sp - conf[0] # 1 sigma confidence below mean
         return sp, uperr, loerr, 0.5*(uperr+loerr)
     except ValueError as e:
         return 0, 0, 0, 0
Exemple #5
0
def collection_area(
    all_events,
    selected_events,
    impact,
    bins,
    sample_fraction=1.0,
    smoothing=0,
):
    '''
    Calculate the collection area for the given events.

    Parameters
    ----------
    all_events: array-like
        Quantity which should be histogrammed for all simulated events
    selected_events: array-like
        Quantity which should be histogrammed for all selected events
    bins: int or array-like
        either number of bins or bin edges for the histogram
    impact: astropy Quantity of type length
        The maximal simulated impact parameter
    sample_fraction: float
        The fraction of `all_events` that was analysed
        to create `selected_events`
    smoothing: float
        The amount of smoothing to apply to the resulting matrix
    '''

    hist_all, hist_selected, bin_edges = histograms(
        all_events,
        selected_events,
        bins,
    )

    hist_selected = (hist_selected / sample_fraction).astype(int)

    bin_width = np.diff(bin_edges)
    bin_center = 0.5 * (bin_edges[:-1] + bin_edges[1:])

    invalid = hist_selected > hist_all
    hist_selected[invalid] = hist_all[invalid]
    # use astropy to compute errors on that stuff
    lower_conf, upper_conf = binom_conf_interval(hist_selected, hist_all)

    # scale confidences to match and split
    lower_conf = lower_conf * np.pi * impact**2
    upper_conf = upper_conf * np.pi * impact**2

    area = (hist_selected / hist_all) * np.pi * impact**2

    if smoothing > 0:
        area = gaussian_filter(area.value, sigma=smoothing) * area.unit

    return area, bin_center, bin_width, lower_conf, upper_conf
Exemple #6
0
def main(tstart, scale, model='Chev94', sigma=3, iterations=ITER, conf=CONF):
    """Print binomial confidence interval for CSM interaction rate within
    given parameter bounds.
    Inputs:
        tstart: tuple, CSM model interaction start time bounds
        scale: tuple, CSM model scale factor bounds
        model: 'Chev94' or 'flat', spectral model
    """

    # Initialize DataFrame
    rate_df = pd.DataFrame(
        [],
        index=['GALEX', 'G19', 'All UV'],
        columns=['Detections', 'Trials', 'Lower Limit [%]', 'Upper Limit [%]'])

    # Get save directories
    galex_save_dir = run_dir('galex', model, sigma, detections=False)
    graham_save_dir = run_dir('Graham', model, sigma, detections=False)
    graham_det_dir = run_dir('Graham', model, sigma, detections=True)

    # Successes and trials
    rate_df.loc['GALEX',
                'Trials'] = count_recovered_sne(galex_save_dir, tstart, scale,
                                                iterations)
    rate_df.loc['GALEX', 'Detections'] = 0
    graham_detections = count_recovered_sne(graham_det_dir, tstart, scale,
                                            iterations)
    graham_nondetections = count_recovered_sne(graham_save_dir, tstart, scale,
                                               iterations)
    rate_df.loc['G19', 'Detections'] = graham_detections
    rate_df.loc['G19', 'Trials'] = graham_detections + graham_nondetections
    rate_df.loc['All UV'] = np.sum(rate_df.loc[['GALEX', 'G19']])

    # Calculate binomial confidence interval
    # bci = 100 * binom_conf_interval(rate_df['Detections'], rate_df['Trials'],
    #         confidence_level=conf, interval='jeffreys')
    for study in rate_df.index:
        detections = rate_df.loc[study, 'Detections']
        trials = rate_df.loc[study, 'Trials']
        if trials >= 1:
            bci = 100 * binom_conf_interval(
                detections, trials, confidence_level=conf, interval='jeffreys')
            rate_df.loc[study, ['Lower Limit [%]', 'Upper Limit [%]']] = bci.T
        else:
            rate_df.loc[study, 'Lower Limit [%]'] = np.nan
            rate_df.loc[study, 'Upper Limit [%]'] = np.nan
    # bci_lower, bci_upper = bci_nan(rate_df[['Detections']], rate_df[['Trials']])
    # rate_df['Lower Limit [%]'] = bci_lower
    # rate_df['Upper Limit [%]'] = bci_upper

    print('\nConfidence intervals for %s < tstart < %s, ' % tstart +
          '%s < S < %s using the %s model' % (scale + (model, )))
    print(rate_df)
Exemple #7
0
def collection_area(
    all_events,
    selected_events,
    impact,
    bins,
    range=None,
    log=True,
    sample_fraction=1.0,
):
    '''
    Calculate the collection area for the given events.
    Parameters
    ----------
    all_events: array-like
        Quantity which should be histogrammed for all simulated events
    selected_events: array-like
        Quantity which should be histogrammed for all selected events
    bins: int or array-like
        either number of bins or bin edges for the histogram
    impact: astropy Quantity of type length
        The maximal simulated impact parameter
    log: bool
        flag indicating whether log10 should be applied to the quantity.
    sample_fraction: float
        The fraction of `all_events` that was analysed
        to create `selected_events`
    '''

    hist_all, hist_selected, bin_edges = histograms(all_events,
                                                    selected_events,
                                                    bins,
                                                    range=range,
                                                    log=log)

    hist_selected = (hist_selected / sample_fraction).astype(int)

    bin_width = np.diff(bin_edges)
    bin_center = 0.5 * (bin_edges[:-1] + bin_edges[1:])

    invalid = hist_selected > hist_all
    hist_selected[invalid] = hist_all[invalid]
    # use astropy to compute errors on that stuff
    lower_conf, upper_conf = binom_conf_interval(hist_selected, hist_all)

    # scale confidences to match and split
    lower_conf = lower_conf * np.pi * impact**2
    upper_conf = upper_conf * np.pi * impact**2

    area = hist_selected / hist_all * np.pi * impact**2

    return area, bin_center, bin_width, lower_conf, upper_conf
Exemple #8
0
def bci_nan(detections, trials, conf=0.9, interval='jeffreys'):
    """Find binomial confidence interval for DataFrame with NaN values.
    Inputs:
        detections: DataFrame of detections
        trials: DataFrame of trials (incl. detections), same shape as detections
    Returns:
        bci_lower: DataFrame, same shape as trials, with lower BCI limits
        bci_upper: DataFrame, same shape as trials, with upper BCI limits
    """

    from astropy.stats import binom_conf_interval

    if detections.shape != trials.shape:
        raise ValueError('detections and trials must have the same shape.')

    bci_lower = pd.DataFrame([], index=trials.index)
    bci_upper = pd.DataFrame([], index=trials.index)

    # Calculate binomial confidence intervals
    for col in trials.columns:
        # separate bins with no trials
        pos_index = trials[trials[col] >= 1].index
        zero_index = trials[trials[col] < 1].index

        # calculate BCI for cells with positive trials
        bci = binom_conf_interval(detections.loc[pos_index, col],
                                  trials.loc[pos_index, col],
                                  confidence_level=conf,
                                  interval=interval)

        # add to dataframes
        bci_lower.loc[pos_index, col] = bci[0].T
        bci_upper.loc[pos_index, col] = bci[1].T

        # in cases with no trials, lower limit 0. and upper limit 1.
        bci_lower.loc[zero_index, col] = 0.
        bci_upper.loc[zero_index, col] = 1.

    return bci_lower, bci_upper
Exemple #9
0
def main():
    parser = argparse.ArgumentParser(description='Plot detection limits.')
    parser.add_argument('-o',
                        '--overwrite',
                        action='store_true',
                        help='re-concatenate detection and nondetection data.')
    parser.add_argument('-s',
                        '--systematics',
                        action='store_true',
                        help='plot observation and sample systematics.')
    parser.add_argument('--show',
                        action='store_true',
                        help='show plot after saving')
    parser.add_argument('--presentation',
                        action='store_true',
                        help='configure plots for presentation')
    args = parser.parse_args()

    sn_info = pd.read_csv(Path('ref/sn_info.csv'), index_col='name')
    conf_det = pd.read_csv(Path('out/confirmed_detections.csv'))
    det_sne = list(zip(conf_det['Name'], conf_det['Band']))

    if args.overwrite or not Path('out/nondetections.csv').is_file():
        print('Separating detections from nondetections...')
        detections = aggregate_detections(det_sne, sn_info)
        nondetections = aggregate_nondetections(det_sne, sn_info)
    else:
        detections = pd.read_csv(Path('out/detections.csv'))
        nondetections = pd.read_csv(Path('out/nondetections.csv'))

    if args.systematics:
        print('Plotting systematics...')
        # Look for systematics in observations
        all_detections = nondetections.append(detections)
        all_detections.set_index('name', inplace=True)
        plot_observation_systematics(all_detections, sn_info)
        plot_sample_systematics(sn_info)

    print('Plotting detections & limits...')

    fig, ax = plt.subplots()
    fig.set_tight_layout(True)

    ebar_alpha = 0.8
    limit_alpha = 0.6
    nondet_alpha = 0.05
    faint_alpha = 0.3
    upper_lim = 1e28
    lower_lim = 1e22 if args.presentation else None
    cutoff = 10**25.88  # Graham 2015cp detection
    det_ms = 6  # detection marker size

    markers = ['o', 's', 'p', 'd', 'P']
    colors = ['cyan', 'orange', 'green', 'magenta']

    # Plot Swift SN2011fe from Brown+ 2012
    if args.presentation:
        band = 'UVW1'
    else:
        band = 'UVM2'
    SN2011fe = pd.read_csv(Path('external/SN2011fe_Brown2012.tsv'),
                           sep='\t',
                           comment='#',
                           skiprows=[45, 46])
    SN2011fe = SN2011fe[pd.notna(SN2011fe['mag'])]
    SN2011fe['t_delta'] = SN2011fe['MJD'] - Time('2011-08-24',
                                                 format='iso').mjd
    lc = SN2011fe[SN2011fe['Filt'] == band.lower()].copy()
    dist = 6.4  # Mpc; from Shappee & Stanek 2011
    z = 0  # too close to need correction
    a_v = 0  # won't worry about it right now
    a_band = 'NUV'  # close enough
    lc['FluxDensity'], lc['e_FluxDensity'] = swift_cps2flux(
        lc['CRate'], lc['e_CRate'], band)
    lc['Luminosity'] = flux2luminosity(lc['FluxDensity'], dist, z, a_v, a_band)
    lc['Luminosity_hz'] = wavelength2freq(lc['Luminosity'], 2245.8)
    ax.plot(lc['t_delta'],
            lc['Luminosity_hz'],
            color='brown',
            label='SN2011fe (%s)' % band,
            zorder=1)

    # Plot near-peak and CSM detections
    for i, (sn, band) in enumerate(det_sne):
        lc = detections[(detections['name'] == sn)
                        & (detections['band'] == band)]
        lc_det = lc[lc['sigma'] > DET_SIGMA]
        lc_non = lc[lc['sigma'] <= DET_SIGMA]
        if args.presentation:
            # Plot nondetection limits of near-peak SNe
            plot_luminosity_limit(ax,
                                  lc_non,
                                  s=36,
                                  c=COLORS[band],
                                  a=faint_alpha,
                                  e='none',
                                  z=2)
        else:
            ax.errorbar(lc_det['t_delta_rest'],
                        lc_det['luminosity_hostsub_hz'],
                        yerr=lc_det['luminosity_hostsub_err_hz'],
                        linestyle='none',
                        label='%s (%s)' % (sn, band),
                        marker=markers[i],
                        ms=det_ms,
                        markeredgecolor='k',
                        color=colors[i],
                        ecolor='k',
                        elinewidth=1,
                        zorder=9)
            # Plot nondetection limits of near-peak SNe
            plot_luminosity_limit(ax,
                                  lc_non,
                                  s=det_ms**2,
                                  c=colors[i],
                                  a=limit_alpha,
                                  e='k',
                                  z=8)

    # Plot nondetections
    for band in ['FUV', 'NUV']:
        lc = nondetections[nondetections['band'] == band]
        # Make distant (bright) limits smaller
        bright = lc[LIMIT_SIGMA * lc['luminosity_hostsub_err_hz'] >= cutoff]
        plot_luminosity_limit(ax,
                              bright,
                              s=16,
                              c=COLORS[band],
                              a=nondet_alpha,
                              e='none',
                              z=2)
        # Make close (faint) limits bigger
        faint = lc[LIMIT_SIGMA * lc['luminosity_hostsub_err_hz'] < cutoff]
        plot_luminosity_limit(ax,
                              faint,
                              s=36,
                              c=COLORS[band],
                              a=faint_alpha,
                              e='none',
                              z=3)

    # Plot Graham detections
    # note: Graham uses days past explosion, not discovery
    if args.presentation:
        ax.axhline(y=cutoff, color='r', label='SN2015cp (F275W)', zorder=10)
    else:
        ax.scatter(686,
                   10**25.88,
                   marker='*',
                   s=100,
                   color='r',
                   edgecolors='k',
                   label='SN2015cp (F275W)',
                   zorder=10)
        ax.scatter(477,
                   10**26.06,
                   marker='X',
                   s=64,
                   color='y',
                   edgecolors='k',
                   label='ASASSN-15og (F275W)',
                   zorder=10)

    ax.set_xlabel('Rest frame time since discovery [days]')
    # ax.set_xlabel('Observed time since discovery [days]')
    ax.set_xlim((-50, np.max(faint['t_delta_rest']) + 50))
    ax.set_ylabel('Luminosity [erg s$^{-1}$ Hz$^{-1}$]')
    ax.set_yscale('log')
    ax.set_ylim((lower_lim, upper_lim))

    # Legend
    handles, labels = ax.get_legend_handles_labels()
    legend_elements = [
        Line2D([0], [0],
               marker='v',
               markerfacecolor=COLORS['FUV'],
               markeredgecolor='none',
               markersize=6,
               alpha=faint_alpha,
               label='detection limit (FUV)',
               lw=0),
        Line2D([0], [0],
               marker='v',
               markerfacecolor=COLORS['NUV'],
               markeredgecolor='none',
               markersize=6,
               alpha=faint_alpha,
               label='detection limit (NUV)',
               lw=0)
    ]
    ncol = 2 if args.presentation else 3
    plt.legend(handles=handles + legend_elements,
               loc='upper right',
               ncol=ncol,
               handletextpad=0.2,
               handlelength=1.0)

    plt.savefig(Path('figs/limits.png'), dpi=300)
    if args.show:
        plt.show()
    else:
        plt.close()

    # Binomial statistics plot
    fig, ax = plt.subplots()

    conf_level = 0.9
    # Include all nondetections below the luminosity of 2015cp
    below_graham = nondetections[nondetections['luminosity_hostsub_err_hz'] *
                                 LIMIT_SIGMA < cutoff]
    # Also include limits from near-peak SNe
    below_graham.append(
        lc_non[lc_non['luminosity_hostsub_err_hz'] * LIMIT_SIGMA < cutoff])
    # Only those after discovery
    below_graham = below_graham[below_graham['t_delta_rest'] > 0]
    print('Number of SNe with limits fainter than 2015cp: %s' %
          len(below_graham.drop_duplicates('name').index))
    print('Number of observations with limits fainter than 2015cp: %s' %
          len(below_graham.index))
    bins = [0, 100, 500, 2500]
    k = []
    n = []
    labels = []
    for i in range(len(bins) - 1):
        limits = below_graham[(below_graham['t_delta_rest'] >= bins[i])
                              & (below_graham['t_delta_rest'] < bins[i + 1])]
        discrete_sne = limits.drop_duplicates('name')
        k.append(0)
        n.append(len(discrete_sne.index))
        labels.append('%s - %s' % (bins[i], bins[i + 1]))
    print(bins)
    print(n)
    bci = 100 * binom_conf_interval(
        k, n, confidence_level=conf_level, interval='jeffreys')
    print(bci)
    midpoint = np.mean(bci, axis=0)
    x_pos = np.arange(len(bins) - 1)
    ax.errorbar(x_pos,
                midpoint,
                yerr=np.abs(bci - midpoint),
                capsize=10,
                marker='o',
                linestyle='none',
                ms=10,
                mec='r',
                c='r',
                mfc='w',
                label='This study')

    # Confidence interval from Yao 2019
    ztf_bci = 100 * binom_conf_interval(
        1, 127, confidence_level=conf_level, interval='jeffreys')
    print(ztf_bci)
    ztf_mean = np.mean(ztf_bci)
    ax.errorbar([0.1], [ztf_mean],
                yerr=([ztf_mean - ztf_bci[0]], [ztf_bci[1] - ztf_mean]),
                marker='o',
                c='b',
                linestyle='none',
                ms=10,
                capsize=10,
                mec='b',
                mfc='w',
                label='ZTF')

    # ASAS-SN interval
    asassn_bci = 100 * binom_conf_interval(
        3, 460, confidence_level=conf_level, interval='jeffreys')
    print(asassn_bci)
    asassn_mean = np.mean(asassn_bci)
    ax.errorbar([0.2], [asassn_mean],
                yerr=([asassn_mean - asassn_bci[0]],
                      [asassn_bci[1] - asassn_mean]),
                marker='o',
                c='orange',
                linestyle='none',
                ms=10,
                capsize=10,
                mec='orange',
                mfc='w',
                label='ASAS-SN')

    # Confidence interval & assumed late-onset rate from Graham 2019
    graham_rate = 6
    graham_bci = 100 * binom_conf_interval(
        1, 64, confidence_level=conf_level, interval='jeffreys')
    print(graham_bci)
    ax.errorbar([2.1], [graham_rate],
                yerr=([graham_rate - graham_bci[0]],
                      [graham_bci[1] - graham_rate]),
                marker='v',
                color='g',
                linestyle='none',
                ms=15,
                capsize=10,
                label='G19')
    # ax.annotate('G19', (2.1, graham_rate), textcoords='offset points',
    #         xytext=(10, 0), ha='left', va='center', size=18, color='g')

    ax.set_xlim((x_pos[0] - 0.5, x_pos[-1] + 0.5))
    ax.set_xticks(x_pos)
    ax.set_xticklabels(labels)
    ax.tick_params(axis='x', which='minor', bottom=False, top=False)
    ax.set_xlabel('Rest frame time since discovery [days]')
    ax.set_ylabel('Rate of CSM interaction [%]')

    # Preliminary!
    if args.presentation:
        fig.text(0.95,
                 0.05,
                 'PRELIMINARY',
                 fontsize=72,
                 color='gray',
                 rotation='30',
                 ha='right',
                 va='bottom',
                 alpha=0.5)

    plt.tight_layout()
    plt.legend()
    plt.savefig(Path('figs/rates.png'), dpi=300)
    if args.show:
        plt.show()
    else:
        plt.close()
Exemple #10
0
                                     bins=6,
                                     range=[r1, r2])
        vol_table1['env_bins'] = np.digitize(vol_table1['logSurfaceDensity'],
                                             bins1)
        grouped1 = vol_table1.group_by('env_bins')
        means = grouped1.groups.aggregate(np.mean)

        #creating an array to find number of data points in each environmental bin
        n = []
        #this doesnt work properly, n ends up having different dimensions to p
        for y in range(1, len(bins1)):
            lens1 = vol_table1['env_bins'] == y
            yy = grouped1[lens1]
            yy = len(yy['env_bins'])
            n = n + [yy]

        #binomial errors
        p = means['spiral_spiral_deb_frac']
        k = n * p
        a, b = binom_conf_interval(k, n)
        a = p - a
        b = b - p
        error = [a, b]
        #unsure what to set the upper and lower limits as
        ax.errorbar(means['logSurfaceDensity'],
                    means['spiral_spiral_deb_frac'],
                    error,
                    fmt='.-',
                    label=str(x))
        legend = ax.legend(loc='upper right', shadow=True, prop={'size': 6})
    set(list(jhu[jhudwarf].index)) & set(list(nsa[nsadwarf].index)))
nsaandportdwarfs = list(
    set(list(nsa[nsadwarf].index)) & set(list(port[portdwarf].index)))
print(len(jhuandportdwarfs), len(jhuandnsadwarfs), len(nsaandportdwarfs))

jhuandportdwarfagn = list(
    set(list(jhu[jhudwarfagn].index)) & set(list(port[portdwarfagn].index)))
jhuandnsadwarfagn = list(
    set(list(jhu[jhudwarfagn].index)) & set(list(nsa[nsadwarfagn].index)))
nsaandportdwarfagn = list(
    set(list(nsa[nsadwarfagn].index)) & set(list(port[portdwarfagn].index)))
print(len(jhuandportdwarfagn), len(jhuandnsadwarfagn), len(nsaandportdwarfagn))

print(len(jhuandportdwarfagn), len(jhuandnsadwarfagn), len(nsaandportdwarfagn))
print (100.0*len(jhuandportdwarfagn)/len(jhuandportdwarfs), \
       100.0*binom_conf_interval(len(jhuandportdwarfagn),len(jhuandportdwarfs)) -\
       100.0*len(jhuandportdwarfagn)/len(jhuandportdwarfs))
print (100.0*len(jhuandnsadwarfagn)/len(jhuandnsadwarfs),\
       100.0*binom_conf_interval(len(jhuandnsadwarfagn),len(jhuandnsadwarfs)) -\
       100.0*len(jhuandnsadwarfagn)/len(jhuandnsadwarfs))
print (100.0*len(nsaandportdwarfagn)/len(nsaandportdwarfs),\
       100.0*binom_conf_interval(len(nsaandportdwarfagn),len(nsaandportdwarfs))-\
       100.0*len(nsaandportdwarfagn)/len(nsaandportdwarfs))

print('JHU or Port', 'JHU or NSA', 'NSA or Port')
jhuorport = list(set(list(jhu.index)) | set(list(port.index)))
jhuornsa = list(set(list(jhu.index)) | set(list(nsa.index)))
nsaorport = list(set(list(nsa.index)) | set(list(port.index)))
print(len(jhuorport), len(jhuornsa), len(nsaorport))

jhuorportdwarfs = list(
Exemple #12
0
def main(input_file, output, n_bins, cuts_path, reference):

    bins, bin_center, bin_widths = make_energy_bins(e_min=0.008 * u.TeV,
                                                    e_max=200 * u.TeV,
                                                    bins=n_bins)

    gammas, _, _ = load_signal_events(input_file, columns=cols)

    if cuts_path:
        gammas = apply_cuts(gammas, cuts_path, theta_cuts=True, sigma=0)

    runs = read_data(input_file, key='runs')
    mc_production = MCSpectrum.from_cta_runs(runs)

    gammas_energy = gammas.gamma_energy_prediction_mean.values

    hist_all = mc_production.expected_events_for_bins(energy_bins=bins)
    hist_selected, _ = np.histogram(gammas_energy, bins=bins)

    invalid = hist_selected > hist_all
    hist_selected[invalid] = hist_all[invalid]
    # use astropy to compute errors on that stuff
    lower_conf, upper_conf = binom_conf_interval(hist_selected, hist_all)

    # scale confidences to match and split
    lower_conf = lower_conf * mc_production.generation_area
    upper_conf = upper_conf * mc_production.generation_area

    area = (hist_selected / hist_all) * mc_production.generation_area

    # matplotlib wants relative offsets for errors. the conf values are absolute.
    lower = area - lower_conf
    upper = upper_conf - area

    mask = area > 0
    plt.errorbar(
        bin_center.value[mask],
        area.value[mask],
        xerr=bin_widths.value[mask] / 2.0,
        yerr=[lower.value[mask], upper.value[mask]],
        linestyle='',
        color=main_color,
    )

    if reference:
        df = load_effective_area_requirement()
        plt.plot(df.energy,
                 df.effective_area,
                 '--',
                 color='gray',
                 label='Prod3b reference')

    plt.legend()

    plt.ylim([100, 1E8])
    plt.xscale('log')
    plt.yscale('log')
    plt.xlabel(r'$E_{\mathrm{Reco}} /  \mathrm{TeV}$')
    plt.ylabel(r'$\mathrm{Mean Effective\; Area} / \mathrm{m}^2$')
    plt.tight_layout()
    if output:
        plt.savefig(output)
    else:
        plt.show()
Exemple #13
0
def save_results_good_data_nounique_model(test_dose_response, qc_flag,
                                          model_preds, selected_models,
                                          chemical_id, end_point):
    # Create the PdfPages object to which we will save the pages:
    # The with statement makes sure that the PdfPages object is closed properly at
    # the end of the block, even if an Exception occurs.

    # Estimate AUC and min and mox doses
    if (not test_dose_response.empty):
        print("test_dose_response:" + str(test_dose_response))

        dose_response_auc = np.trapz(test_dose_response.num_affected /
                                     test_dose_response.total_num,
                                     x=test_dose_response.dose)
        dose_min = min(test_dose_response.dose)
        dose_max = max(test_dose_response.dose)
        dose_response_auc_norm = dose_response_auc / (dose_max - dose_min)
    else:
        dose_response_auc = np.nan
        dose_min = np.nan
        dose_max = np.nan
        dose_response_auc_norm = np.nan

    if (not isinstance(chemical_id, str)):
        chemical_id = str(chemical_id)

    filename = chemical_id + '_' + end_point + '.pdf'
    model_preds = model_preds.round(8)

    # Extract subset of results table
    model_preds_basic_stats = model_preds[[
        'Model', 'Chi-squared', 'p-val', 'AIC', 'BMD10', 'BMDL10'
    ]]

    residual_column_names = [('dose' + str(i))
                             for i in range(len(test_dose_response['dose']))]
    model_preds_residuals = pd.DataFrame(columns=['Model'] +
                                         residual_column_names)
    model_preds_residuals['Model'] = model_preds['Model']
    model_preds_residuals_matrix = np.empty(
        (model_preds['Scaled Residuals'].shape[0],
         len(test_dose_response['dose'])))
    model_preds_residuals_matrix[:] = np.nan

    model_preds_residuals[residual_column_names] = model_preds_residuals_matrix
    for model_pred_index in range(model_preds['Scaled Residuals'].shape[0]):
        if (not any(np.isnan(
                model_preds['Scaled Residuals'][model_pred_index]))):
            model_preds_residuals.iloc[model_pred_index, 1:] = np.matrix(
                model_preds['Scaled Residuals']
                [model_pred_index].tolist()).round(8)

    # Create dictionaries for various flags
    data_qc_flag_vals = {
        0:
        'Not enough dose groups for BMD analysis.' + '\n ' +
        'BMD analysis not performed.',
        1:
        'No trend detected in dose-response data.' + '\n' +
        'BMD analysis not performed.',
        2:
        'Dose-response data quality very good.',
        3:
        'Dose-response data quality good.',
        4:
        'Data resolution poor.' + '\n' + 'Caution advised.',
        5:
        'Negative correlation detected in dose-response data.' + '\n' +
        'Caution advised.'
    }

    bmd_analysis_flag_vals = {
        1:
        'Convergence not achieved for any dose-response model.',
        2:
        'Model fit might be unreliable.' + '\n' +
        'p-val for chi-squared statistic was < 0.1 for all converged models.',
        3:
        'A unique model could not be determined.' + '\n' +
        'Multiple models had the same AIC and BMD values but no valid BMDL values.',
        4:
        'Multiple models found.' + '\n' +
        'User advised to look at the results of analysis to choose the best model.'
    }

    #txt_for_model_selection = selected_models['model'].values + ' determined to be the best model'
    unique_model_flag_vals = {
        0: 'None',
        1: 'Best model could not be determined'
    }

    bmd_analysis_flag = selected_models['model_select_flag']
    unique_model_flag = selected_models['no_unique_model_found_flag']

    # Filenames for csv files containing the results of analysis
    bmd_vals_file_name = 'bmd_vals_' + str(time_now_date) + '.csv'
    dose_response_vals_file_name = 'dose_response_vals_' + str(
        time_now_date) + '.csv'
    fit_vals_file_name = 'fit_vals_' + str(time_now_date) + '.csv'

    # Generate text for report
    text_for_report = data_qc_flag_vals[qc_flag]

    # Specify reason for non-unique model
    text_for_report += '\n' + unique_model_flag_vals[unique_model_flag]
    text_for_report += '\n' + bmd_analysis_flag_vals[bmd_analysis_flag]

    with PdfPages(filename) as pdf:
        # Output data summary
        fig, ax = plt.subplots()
        # hide axes
        fig.patch.set_visible(False)

        ax.axis('off')
        ax.axis('tight')
        fig.text(0.1,
                 0.7,
                 ' '.join(map(str, text_for_report)),
                 transform=fig.transFigure,
                 size=10,
                 ha="left")
        plt.title('Summary of Analysis')
        pdf.savefig()
        plt.close()

        # Print Model Predictions
        fig, ax = plt.subplots()
        # hide axes
        fig.patch.set_visible(False)

        ax.axis('off')
        ax.axis('tight')
        ax.table(cellText=model_preds_basic_stats.values,
                 colLabels=model_preds_basic_stats.columns,
                 loc='center')

        plt.title('Model Predictions')
        fig.tight_layout()
        pdf.savefig()  # saves the current figure into a pdf page
        plt.close()

        # Print residuals for different models
        fig, ax = plt.subplots()
        # hide axes
        fig.patch.set_visible(False)
        ax.axis('off')
        ax.axis('tight')

        ax.table(cellText=model_preds_residuals.values,
                 colLabels=model_preds_residuals.columns,
                 loc='center')

        plt.title('Scaled Residuals')
        fig.tight_layout()
        pdf.savefig()  # saves the current figure into a pdf page
        plt.close()

        CI_bounds = np.zeros([2, len(test_dose_response.dose)])
        for index in range(len(test_dose_response.dose)):
            CI = astrostats.binom_conf_interval(
                test_dose_response.num_affected[index],
                test_dose_response.total_num[index],
                confidence_level=0.95)
            CI = np.abs(CI - test_dose_response.num_affected[index] /
                        test_dose_response.total_num[index])
            CI_bounds[0, index] = CI[0]
            CI_bounds[1, index] = CI[1]

        fig, ax = plt.subplots()
        ax.set_xscale("linear")
        ax.errorbar(test_dose_response.dose,
                    test_dose_response.num_affected /
                    test_dose_response.total_num,
                    CI_bounds,
                    marker='s',
                    mfc='red',
                    fmt='.')

        ax.set_xlabel('Dose')
        ax.set_ylabel('Fractional Response')
        ax.set_title('Dose-response Data')
        pdf.savefig()  # saves the current figure into a pdf page
        plt.close()

        # Create dataframes to apprend to write to csv files
        bmd_vals = pd.DataFrame(columns=[
            'Chemical_ID', 'End_Point', 'Model', 'BMD10', 'BMDL', 'BMD50',
            'AUC', 'Min_Dose', 'Max_Dose', 'AUC_Norm', 'DataQC_Flag',
            'BMD_Analysis_Flag', 'BMD10_Flag', 'BMD50_Flag'
        ])
        dose_response_vals = pd.DataFrame(columns=[
            'Chemical_ID', 'End_Point', 'Dose', 'Response', 'CI_Lo', 'CI_Hi'
        ])
        fit_vals = pd.DataFrame(
            columns=['Chemical_ID', 'End_Point', 'X_vals', 'Y_vals'])

        # Populate dataframes
        bmd_vals['Chemical_ID'] = [chemical_id]
        bmd_vals['End_Point'] = [end_point]
        bmd_vals['Model'] = np.nan
        bmd_vals['BMD10'] = np.nan
        bmd_vals['BMDL'] = np.nan
        bmd_vals['BMD50'] = np.nan
        bmd_vals['DataQC_Flag'] = qc_flag
        bmd_vals['AUC'] = dose_response_auc
        bmd_vals['Min_Dose'] = dose_min
        bmd_vals['Max_Dose'] = dose_max
        bmd_vals['AUC_Norm'] = dose_response_auc_norm
        bmd_vals['BMD_Analysis_Flag'] = bmd_analysis_flag
        bmd_vals['BMD10_Flag'] = np.nan
        bmd_vals['BMD50_Flag'] = np.nan

        dose_response_vals['Chemical_ID'] = [chemical_id] * len(
            test_dose_response.dose)
        dose_response_vals['End_Point'] = [end_point] * len(
            test_dose_response.dose)
        dose_response_vals['Dose'] = test_dose_response.dose
        dose_response_vals[
            'Response'] = test_dose_response.num_affected / test_dose_response.total_num
        dose_response_vals['CI_Lo'] = CI_bounds[0, :]
        dose_response_vals['CI_Hi'] = CI_bounds[1, :]

        fit_vals['Chemical_ID'] = [chemical_id]
        fit_vals['End_Point'] = [end_point]
        fit_vals['X_vals'] = np.nan
        fit_vals['Y_vals'] = np.nan

        if not os.path.isfile(bmd_vals_file_name):
            bmd_vals.to_csv(bmd_vals_file_name,
                            header='column_names',
                            index=False,
                            na_rep='NULL')
        else:  # else it exists so append without writing the header
            bmd_vals.to_csv(bmd_vals_file_name,
                            mode='a',
                            header=False,
                            index=False,
                            na_rep='NULL')

        if not os.path.isfile(dose_response_vals_file_name):
            dose_response_vals.to_csv(dose_response_vals_file_name,
                                      header='column_names',
                                      index=False,
                                      na_rep='NULL')
        else:  # else it exists so append without writing the header
            dose_response_vals.to_csv(dose_response_vals_file_name,
                                      mode='a',
                                      header=False,
                                      index=False,
                                      na_rep='NULL')

        if not os.path.isfile(fit_vals_file_name):
            fit_vals.to_csv(fit_vals_file_name,
                            header='column_names',
                            index=False,
                            na_rep='NULL')
        else:  # else it exists so append without writing the header
            fit_vals.to_csv(fit_vals_file_name,
                            mode='a',
                            header=False,
                            index=False,
                            na_rep='NULL')

        # We can also set the file's metadata via the PdfPages object:
        d = pdf.infodict()
        d['Author'] = 'Paritosh Pande'
        d['CreationDate'] = datetime.datetime.today()
Exemple #14
0
def save_results_good_data_unique_model(test_dose_response, qc_flag,
                                        model_preds, selected_models,
                                        chemical_id, end_point):
    # Create the PdfPages object to which we will save the pages:
    # The with statement makes sure that the PdfPages object is closed properly at
    # the end of the block, even if an Exception occurs.

    # Estimate AUC and min and mox doses
    if (not test_dose_response.empty):
        dose_response_auc = np.trapz(test_dose_response.num_affected /
                                     test_dose_response.total_num,
                                     x=test_dose_response.dose)
        dose_min = min(test_dose_response.dose)
        dose_max = max(test_dose_response.dose)
        dose_response_auc_norm = dose_response_auc / (dose_max - dose_min)
    else:
        dose_response_auc = np.nan
        dose_min = np.nan
        dose_max = np.nan
        dose_response_auc_norm = np.nan

    if (not isinstance(chemical_id, str)):
        chemical_id = str(chemical_id)

    filename = chemical_id + '_' + end_point + '.pdf'
    model_preds = model_preds.round(8)

    # Extract subset of results table
    model_preds_basic_stats = model_preds[[
        'Model', 'Chi-squared', 'p-val', 'AIC', 'BMD10', 'BMDL10'
    ]]
    residual_column_names = [('dose' + str(i))
                             for i in range(len(test_dose_response['dose']))]
    model_preds_residuals = pd.DataFrame(columns=['Model'] +
                                         residual_column_names)
    model_preds_residuals['Model'] = model_preds['Model']
    model_preds_residuals_matrix = np.empty(
        (model_preds['Scaled Residuals'].shape[0],
         len(test_dose_response['dose'])))
    model_preds_residuals_matrix[:] = np.nan
    model_preds_residuals[residual_column_names] = model_preds_residuals_matrix

    for model_pred_index in range(model_preds['Scaled Residuals'].shape[0]):

        if (not any(np.isnan(
                model_preds['Scaled Residuals'][model_pred_index]))):

            if (report):
                print(f"model_preds_residuals:\n{model_preds_residuals}")

                print(f"model_pred_index:\n{model_pred_index}")  #0

                print(
                    f"model_preds['Scaled Residuals'][model_pred_index]:\n{model_preds['Scaled Residuals'][model_pred_index]}"
                )
                #[-0.71275987 -0.04841195  1.2423122   0.22264199  0.0676003  -0.32941879 -1.42086953  1.03044301]

                print(
                    f"type(model_preds['Scaled Residuals'][model_pred_index]):\n{type(model_preds['Scaled Residuals'][model_pred_index])}"
                )
                #<class 'numpy.ndarray'>

                print(
                    f"type(model_preds['Scaled Residuals'][model_pred_index].tolist()):\n{type(model_preds['Scaled Residuals'][model_pred_index].tolist())}"
                )
                #<class 'list'>

                print(
                    f"model_preds['Scaled Residuals'][model_pred_index].tolist():\n{model_preds['Scaled Residuals'][model_pred_index].tolist()}"
                )
                #[-0.712759872754371, -0.048411946176013756, 1.2423122026919409, 0.22264198996743165, 0.06760030169949577, -0.3294187919305739, -1.4208695346666183, 1.0304430051297178]

                print(
                    f"np.matrix(model_preds['Scaled Residuals'][model_pred_index].tolist()).round(8):\n{np.matrix(model_preds['Scaled Residuals'][model_pred_index].tolist()).round(8)}"
                )

            model_preds_residuals.iloc[model_pred_index, 1:] = np.matrix(
                model_preds['Scaled Residuals']
                [model_pred_index].tolist()).round(8)

    # Create dictionaries for various flags
    data_qc_flag_vals = {
        0:
        'Not enough dose groups for BMD analysis.' + '\n ' +
        'BMD analysis not performed.',
        1:
        'No trend detected in dose-response data.' + '\n' +
        'BMD analysis not performed.',
        2:
        'Dose-response data quality very good.',
        3:
        'Dose-response data quality good.',
        4:
        'Data resolution poor. Caution advised.',
        5:
        'Negative correlation detected in dose-response data.' + '\n' +
        'Caution advised.'
    }

    bmd_analysis_flag_vals = {
        1:
        'Convergence not achieved for any dose-response model.',
        2:
        'Model fit might be unreliable.' + '\n' +
        'p-val for chi-squared statistic was < 0.1 for all converged models.',
        3:
        'A unique model could not be determined.' + '\n' +
        'Multiple models had the same AIC and BMD values but no valid BMDL values.',
        4:
        'Multiple models found.' + '\n' +
        'User advised to look at the results of analysis to choose the best model.'
    }

    txt_for_model_selection = 'Best model found:' + selected_models[
        'model'].values
    unique_model_flag_vals = {
        0: txt_for_model_selection,
        1: 'Best model could not be determined'
    }

    bmd_analysis_flag = selected_models['model_select_flag']
    unique_model_flag = selected_models['no_unique_model_found_flag']

    # Filenames for csv files containing the results of analysis
    bmd_vals_file_name = 'bmd_vals_' + str(time_now_date) + '.csv'
    dose_response_vals_file_name = 'dose_response_vals_' + str(
        time_now_date) + '.csv'
    fit_vals_file_name = 'fit_vals_' + str(time_now_date) + '.csv'

    text_for_report = data_qc_flag_vals[qc_flag]

    # Generate text for report
    if ((unique_model_flag == 0) and (bmd_analysis_flag != 2)):
        text_for_report = text_for_report + '\n' + unique_model_flag_vals[
            unique_model_flag]
    elif ((unique_model_flag == 0) and (bmd_analysis_flag == 2)):
        text_for_report = text_for_report + '\n' + unique_model_flag_vals[
            unique_model_flag] + '\n' + bmd_analysis_flag_vals[
                bmd_analysis_flag]
    else:
        # Specify reason for non-uniqueness
        text_for_report = text_for_report + '\n' + unique_model_flag_vals[
            unique_model_flag] + '\n' + bmd_analysis_flag_vals[
                bmd_analysis_flag]

    with PdfPages(filename) as pdf:
        # Output data summary
        fig, ax = plt.subplots()
        # hide axes
        fig.patch.set_visible(False)

        ax.axis('off')
        ax.axis('tight')
        fig.text(0.1,
                 0.7,
                 ' '.join(map(str, text_for_report)),
                 transform=fig.transFigure,
                 size=10,
                 ha="left")
        plt.title('Summary of Analysis')
        pdf.savefig()
        plt.close()

        # Print Model Predictions
        fig, ax = plt.subplots()
        # hide axes
        fig.patch.set_visible(False)

        ax.axis('off')
        ax.axis('tight')
        ax.table(cellText=model_preds_basic_stats.values,
                 colLabels=model_preds_basic_stats.columns,
                 loc='center')

        plt.title('Model Predictions')
        fig.tight_layout()
        pdf.savefig()  # saves the current figure into a pdf page
        plt.close()

        # Print residuals for different models
        fig, ax = plt.subplots()
        # hide axes
        fig.patch.set_visible(False)

        ax.axis('off')
        ax.axis('tight')
        ax.table(cellText=model_preds_residuals.values,
                 colLabels=model_preds_residuals.columns,
                 loc='center')
        plt.title('Scaled Residuals')
        fig.tight_layout()
        pdf.savefig()  # saves the current figure into a pdf page
        plt.close()

        # Extract data for best model found and save it for portal
        # and plot fit for selected model
        model_name = selected_models['model'].values
        optimized_params = model_preds.loc[model_preds['Model'] ==
                                           model_name[0],
                                           'Optimized Params'].values[0]

        CI_bounds = np.zeros([2, len(test_dose_response.dose)])

        for index in range(len(test_dose_response.dose)):
            CI = astrostats.binom_conf_interval(
                test_dose_response.num_affected[index],
                test_dose_response.total_num[index],
                confidence_level=0.95)
            CI = np.abs(CI - test_dose_response.num_affected[index] /
                        test_dose_response.total_num[index])
            CI_bounds[0, index] = CI[0]
            CI_bounds[1, index] = CI[1]

        fig, ax = plt.subplots()

        # Setting the values for all axes.
        custom_ylim = (0, 1)
        plt.setp(ax, ylim=custom_ylim)

        ax.set_xscale("linear")
        ax.errorbar(test_dose_response.dose,
                    test_dose_response.num_affected /
                    test_dose_response.total_num,
                    CI_bounds,
                    marker='s',
                    mfc='red',
                    fmt='.')

        ax.set_xlabel('Dose')
        ax.set_ylabel('Fractional Response')
        ax.set_title(' '.join(
            map(str,
                'Dose-response with best fit model (' + model_name + ')')))

        int_steps = 10
        dose_x_vals = gen_uneven_spacing(test_dose_response.dose, int_steps)
        np.append(
            dose_x_vals,
            dose_x_vals[-1] + (dose_x_vals[-1] - dose_x_vals[-2]) / int_steps)

        if (model_name != 'None'):
            if (model_name == 'logistic'):
                ax.plot(dose_x_vals,
                        baf.logistic_fun(dose_x_vals, optimized_params), 'b-')
                y_vals = baf.logistic_fun(dose_x_vals, optimized_params)
            elif (model_name == 'log_logistic'):
                ax.plot(dose_x_vals,
                        baf.log_logistic_fun(dose_x_vals, optimized_params),
                        'b-')
                y_vals = baf.log_logistic_fun(dose_x_vals, optimized_params)
            elif (model_name == 'gamma'):
                ax.plot(dose_x_vals,
                        baf.gamma_fun(dose_x_vals, optimized_params), 'b-')
                y_vals = baf.gamma_fun(dose_x_vals, optimized_params)
            elif (model_name == 'weibull'):
                ax.plot(dose_x_vals,
                        baf.weibull_fun(dose_x_vals, optimized_params), 'b-')
                y_vals = baf.weibull_fun(dose_x_vals, optimized_params)
            elif (model_name == 'probit'):
                ax.plot(dose_x_vals,
                        baf.probit_fun(dose_x_vals, optimized_params), 'b-')
                y_vals = baf.probit_fun(dose_x_vals, optimized_params)
            elif (model_name == 'log_probit'):
                ax.plot(dose_x_vals,
                        baf.log_probit_fun(dose_x_vals, optimized_params),
                        'b-')
                y_vals = baf.log_probit_fun(dose_x_vals, optimized_params)
            elif (model_name == 'multistage_2'):
                ax.plot(dose_x_vals,
                        baf.multistage_2_fun(dose_x_vals, optimized_params),
                        'b-')
                y_vals = baf.multistage_2_fun(dose_x_vals, optimized_params)
            elif (model_name == 'quantal_linear'):
                ax.plot(dose_x_vals,
                        baf.quantal_linear_fun(dose_x_vals, optimized_params),
                        'b-')
                y_vals = baf.quantal_linear_fun(dose_x_vals, optimized_params)

        pdf.savefig()  # saves the current figure into a pdf page
        plt.close()

        # Create dataframes to apprend to write to csv files
        bmd_vals = pd.DataFrame(columns=[
            'Chemical_ID', 'End_Point', 'Model', 'BMD10', 'BMDL', 'BMD50',
            'AUC', 'Min_Dose', 'Max_Dose', 'AUC_Norm', 'DataQC_Flag',
            'BMD_Analysis_Flag', 'BMD10_Flag', 'BMD50_Flag'
        ])
        dose_response_vals = pd.DataFrame(columns=[
            'Chemical_ID', 'End_Point', 'Dose', 'Response', 'CI_Lo', 'CI_Hi'
        ])
        #fit_vals = pd.DataFrame(columns = ['Chemical_ID', 'End_Point', 'X_vals', 'Y_vals', 'Y_vals_diff'])
        fit_vals = pd.DataFrame(
            columns=['Chemical_ID', 'End_Point', 'X_vals', 'Y_vals'])

        # Populate dataframes
        bmd_vals['Chemical_ID'] = [chemical_id]
        bmd_vals['End_Point'] = [end_point]
        bmd_vals['Model'] = model_name
        bmd_vals['BMD10'] = model_preds.loc[model_preds['Model'] ==
                                            model_name[0], 'BMD10'].values
        bmd_vals['BMDL'] = model_preds.loc[model_preds['Model'] ==
                                           model_name[0], 'BMDL10'].values
        bmd_vals['BMD50'] = model_preds.loc[model_preds['Model'] ==
                                            model_name[0], 'BMD50'].values
        bmd_vals['DataQC_Flag'] = qc_flag
        bmd_vals['AUC'] = dose_response_auc
        bmd_vals['Min_Dose'] = dose_min
        bmd_vals['Max_Dose'] = dose_max
        bmd_vals['AUC_Norm'] = dose_response_auc_norm
        bmd_vals['BMD_Analysis_Flag'] = bmd_analysis_flag

        if (model_preds.loc[model_preds['Model'] == model_name[0],
                            'BMD10'].values < test_dose_response.dose[1]):
            bmd_vals['BMD10_Flag'] = -1
        elif (model_preds.loc[model_preds['Model'] == model_name[0],
                              'BMD10'].values >
              test_dose_response.dose.iloc[-1]):
            bmd_vals['BMD10_Flag'] = 1
        else:
            bmd_vals['BMD10_Flag'] = 0

        if (model_preds.loc[model_preds['Model'] == model_name[0],
                            'BMD50'].values < test_dose_response.dose[1]):
            bmd_vals['BMD50_Flag'] = -1
        elif (model_preds.loc[model_preds['Model'] == model_name[0],
                              'BMD50'].values >
              test_dose_response.dose.iloc[-1]):
            bmd_vals['BMD50_Flag'] = 1
        else:
            bmd_vals['BMD50_Flag'] = 0

        dose_response_vals['Chemical_ID'] = [chemical_id] * len(
            test_dose_response.dose)
        dose_response_vals['End_Point'] = [end_point] * len(
            test_dose_response.dose)
        dose_response_vals['Dose'] = test_dose_response.dose
        dose_response_vals[
            'Response'] = test_dose_response.num_affected / test_dose_response.total_num
        dose_response_vals['CI_Lo'] = CI_bounds[0, :]
        dose_response_vals['CI_Hi'] = CI_bounds[1, :]

        if (report):
            print(len(dose_x_vals))
            print(len(y_vals))

        fit_vals['Chemical_ID'] = [chemical_id] * len(dose_x_vals)
        fit_vals['End_Point'] = [end_point] * len(dose_x_vals)
        fit_vals['X_vals'] = dose_x_vals
        fit_vals['Y_vals'] = y_vals
        #fit_vals['Y_vals_diff'] = y_vals

        if not os.path.isfile(bmd_vals_file_name):
            bmd_vals.to_csv(bmd_vals_file_name,
                            header='column_names',
                            index=False,
                            na_rep='NULL')
        else:  # else it exists so append without writing the header
            bmd_vals.to_csv(bmd_vals_file_name,
                            mode='a',
                            header=False,
                            index=False,
                            na_rep='NULL')

        if not os.path.isfile(dose_response_vals_file_name):
            dose_response_vals.to_csv(dose_response_vals_file_name,
                                      header='column_names',
                                      index=False,
                                      na_rep='NULL')
        else:  # else it exists so append without writing the header
            dose_response_vals.to_csv(dose_response_vals_file_name,
                                      mode='a',
                                      header=False,
                                      index=False,
                                      na_rep='NULL')

        if not os.path.isfile(fit_vals_file_name):
            fit_vals.to_csv(fit_vals_file_name,
                            header='column_names',
                            index=False,
                            na_rep='NULL')
        else:  # else it exists so append without writing the header
            fit_vals.to_csv(fit_vals_file_name,
                            mode='a',
                            header=False,
                            index=False,
                            na_rep='NULL')

        # We can also set the file's metadata via the PdfPages object:
        d = pdf.infodict()
        d['Author'] = 'Paritosh Pande'
        d['CreationDate'] = datetime.datetime.today()
        edwarf = np.sum((ecodwarfconf.jhu != 0) | (ecodwarfconf.port != 0))
        edwarfagn = np.sum(ecodwarfconf['confidence_level'] >= 0)

    elif ('&' in colname[index]):
        rdwarf = np.sum((resdwarfconf.jhu != 0) & (resdwarfconf.port != 0))
        rdwarfagn = np.sum(resdwarfconf['confidence_level'] == 2)
        edwarf = np.sum((ecodwarfconf.jhu != 0) & (ecodwarfconf.port != 0))
        edwarfagn = np.sum(ecodwarfconf['confidence_level'] == 2)

    else:
        rdwarf = np.sum(resdwarfconf[colname[index]] != 0)
        rdwarfagn = np.sum(resdwarfconf[colname[index]] > 0)
        edwarf = np.sum(ecodwarfconf[colname[index]] != 0)
        edwarfagn = np.sum(ecodwarfconf[colname[index]] > 0)
    rdwarfagnpc = round((100.0 * rdwarfagn / rdwarf), 2)
    r_edown, r_eup = 100.0 * binom_conf_interval(rdwarfagn,
                                                 rdwarf) - rdwarfagnpc
    r_edown = round(-r_edown, 2)
    r_eup = round(r_eup, 2)

    edwarfagnpc = round((100.0 * edwarfagn / edwarf), 2)
    e_edown, e_eup = 100.0 * binom_conf_interval(edwarfagn,
                                                 edwarf) - edwarfagnpc
    e_edown = round(-e_edown, 2)
    e_eup = round(e_eup, 2)

    print '\t'+index+' & '+ str(rdwarf)+' & ',str(rdwarfagn)+' & $'+str(rdwarfagnpc)+\
          '^{+'+str(r_eup)+'}'+'_{'+str(-r_edown)+'}$'+' & '\
          + str(edwarf)+' & ',str(edwarfagn)+' & $'+str(edwarfagnpc)+\
          '^{+'+str(e_eup)+'}'+'_{'+str(-e_edown)+'}$\\\\'
print('\t \hline \n \t \end{tabular} \n \label{table:2} \n \end{table*}')
Exemple #16
0
pcdwarfspringagn = 100.0*dwarfspringagn/totaldwarfagn
pcdwarffallagn  = 100.0*dwarffallagn/totaldwarfagn

print('\n\nDwarfs from RESOLVE Master Catalog')
print('Number of Dwarfs: {} \nSpring Dwarfs: {} ({:.2f}% of spring sample) \
      \nFall Dwarfs: {} ({:.2f}% of fall sample)'
      .format(totaldwarf,len(dwarfspring),pcdwarfspring,
              len(dwarffall),pcdwarffall))
#print('Total: {} \nSpring Dwarf AGN: {} ({:.2f}%) \nFall Dwarf AGN: {} ({:.2f}%)'
#      .format(totaldwarfagn,dwarfspringagn,pcdwarfspringagn,
#              dwarffallagn,pcdwarffallagn))
pcspringdwarfagn = 100.0*dwarfspringagn/len(dwarfspring)
pcfalldwarfagn = 100.0*dwarffallagn/len(dwarffall)
pcresdwarfagn = 100.0*np.sum(dwarfagn)/totaldwarf

springlowlim, springuplim = 100*binom_conf_interval(dwarfspringagn,len(dwarfspring))
springup= springuplim -pcspringdwarfagn
springlow = springlowlim-pcspringdwarfagn
falllowlim, falluplim = 100*binom_conf_interval(dwarffallagn,len(dwarffall))
fallup= falluplim -pcfalldwarfagn
falllow = falllowlim-pcfalldwarfagn
reslowlim, resuplim = 100*binom_conf_interval(np.sum(dwarfagn),totaldwarf)
resup= resuplim -pcresdwarfagn
reslow = reslowlim-pcresdwarfagn

def pcprint(pc,up,low):
    pc = str(round(pc,2))+'^{+'+str(round(up,2))+'}_{'+str(round(low,2))+'}\%'
    display(Math(pc))

display(Math('Dwarf AGN'))
display(Math('Spring : '+str(dwarfspringagn)+'/'+str(len(dwarfspring))))
def main(input_files, labels, output, n_bins, threshold, reference):

    bins, bin_center, bin_widths = make_energy_bins(e_min=0.008 * u.TeV, e_max=200 * u.TeV, bins=n_bins)

    for input_file, label, color in zip_longest(input_files, labels, color_cycle):
        
        if not input_file:
            break
        
        events = read_data(input_file, key='array_events')
        runs = read_data(input_file, key='runs')
        mc_production = MCSpectrum.from_cta_runs(runs)

        if threshold > 0:
            events = events.loc[events.gamma_prediction_mean >= threshold]

        energies = events.gamma_energy_prediction_mean.values

        hist_all = mc_production.expected_events_for_bins(energy_bins=bins)
        hist_selected, _ = np.histogram(energies, bins=bins)

        invalid = hist_selected > hist_all
        hist_selected[invalid] = hist_all[invalid]
        
        # use astropy to compute errors on that stuff
        lower_conf, upper_conf = binom_conf_interval(hist_selected, hist_all)

        # scale confidences to match and split
        lower_conf = lower_conf
        upper_conf = upper_conf

        trigger_probability = (hist_selected / hist_all)

        # matplotlib wants relative offsets for errors. the conf values are absolute.
        lower = trigger_probability - lower_conf
        upper = upper_conf - trigger_probability

        mask = trigger_probability > 0
        plt.errorbar(
            bin_center.value[mask],
            trigger_probability[mask],
            xerr=bin_widths.value[mask] / 2.0,
            yerr=[lower[mask], upper[mask]],
            linestyle='',
            color=color,
            label=label,
        )


    if reference:
        df = load_effective_area_requirement()
        plt.plot(df.energy, df.effective_area, '--', color='gray', label='Prod3b reference')

    plt.legend()


    # plt.ylim([100, 1E8])
    plt.xscale('log')
    # plt.yscale('log')
    plt.xlabel(r'$E_{\mathrm{Reco}} /  \mathrm{TeV}$')
    plt.ylabel('Trigger Probabilty')
    plt.tight_layout()
    if output:
        plt.savefig(output)
    else:
        plt.show()
Exemple #18
0
def save_results_poor_data_or_no_convergence(test_dose_response,
                                             qc_flag,
                                             chemical_id,
                                             end_point,
                                             selected_models=None):
    # Create the PdfPages object to which we will save the pages:
    # The with statement makes sure that the PdfPages object is closed properly at
    # the end of the block, even if an Exception occurs.
    #print(test_dose_response)
    #print(qc_flag)
    #print(chemical_id)
    #print(end_point)
    #print(selected_models)

    # Estimate AUC and min and mox doses
    if (not test_dose_response.empty):
        dose_response_auc = np.trapz(test_dose_response.num_affected /
                                     test_dose_response.total_num,
                                     x=test_dose_response.dose)
        dose_min = min(test_dose_response.dose)
        dose_max = max(test_dose_response.dose)
        dose_response_auc_norm = dose_response_auc / (dose_max - dose_min)
    else:
        dose_response_auc = np.nan
        dose_min = np.nan
        dose_max = np.nan
        dose_response_auc_norm = np.nan

    if (not isinstance(chemical_id, str)):
        chemical_id = str(chemical_id)
    filename = chemical_id + '_' + end_point + '.pdf'

    # Create dictionaries for various flags
    data_qc_flag_vals = {
        0:
        'Not enough dose groups for BMD analysis.' + '\n ' +
        'BMD analysis not performed.',
        1:
        'No trend detected in dose-response data.' + '\n' +
        'BMD analysis not performed.',
        2:
        'Dose-response data quality very good.',
        3:
        'Dose-response data quality good.',
        4:
        'Data resolution poor.' + '\n' + 'Caution advised.',
        5:
        'Negative correlation detected in dose-response data.' + '\n' +
        'Caution advised.'
    }

    # Filenames for csv files containing the results of analysis
    bmd_vals_file_name = 'bmd_vals_' + str(time_now_date) + '.csv'
    dose_response_vals_file_name = 'dose_response_vals_' + str(
        time_now_date) + '.csv'
    fit_vals_file_name = 'fit_vals_' + str(time_now_date) + '.csv'

    # Generate text for report
    if (selected_models is not None):
        text_for_report = 'Convergence not achieved for any dose-response model.'
    else:
        text_for_report = data_qc_flag_vals[qc_flag]

    with PdfPages(filename) as pdf:
        # Output data summary
        fig, ax = plt.subplots()
        # hide axes
        fig.patch.set_visible(False)

        ax.axis('off')
        ax.axis('tight')
        #fig.text(0.1,0.7,' '.join(map(str, text_for_report)), transform=fig.transFigure, size=10, ha="left")
        fig.text(0.1,
                 0.7,
                 text_for_report,
                 transform=fig.transFigure,
                 size=10,
                 ha="left")
        plt.title('Summary of Analysis')
        pdf.savefig()
        plt.close()

        # Plot dose-response data
        CI_bounds = np.zeros([2, len(test_dose_response.dose)])
        # in save_results_poor_data_or_no_convergence fn
        for index in range(len(test_dose_response.dose)):
            print(
                f"test_dose_response.num_affected[index]:{test_dose_response.num_affected[index]}"
            )
            print(
                f"test_dose_response.total_num[index]:{test_dose_response.total_num[index]}"
            )
            CI = astrostats.binom_conf_interval(
                test_dose_response.num_affected[index],
                test_dose_response.total_num[index],
                confidence_level=0.95)
            CI = np.abs(CI - test_dose_response.num_affected[index] /
                        test_dose_response.total_num[index])
            CI_bounds[0, index] = CI[0]
            CI_bounds[1, index] = CI[1]
        fig, ax = plt.subplots()

        # Setting the values for all axes.
        custom_ylim = (0, 1)
        plt.setp(ax, ylim=custom_ylim)

        ax.set_xscale("linear")
        ax.errorbar(test_dose_response.dose,
                    test_dose_response.num_affected /
                    test_dose_response.total_num,
                    CI_bounds,
                    marker='s',
                    mfc='red',
                    fmt='.')

        ax.set_xlabel('Dose')
        ax.set_ylabel('Fractional Response')
        ax.set_title('Dose-response Data')
        pdf.savefig()  # saves the current figure into a pdf page
        plt.close()

        # Create dataframes to apprend to write to csv files
        bmd_vals = pd.DataFrame(columns=[
            'Chemical_ID', 'End_Point', 'Model', 'BMD10', 'BMDL', 'BMD50',
            'AUC', 'Min_Dose', 'Max_Dose', 'AUC_Norm', 'DataQC_Flag',
            'BMD_Analysis_Flag', 'BMD10_Flag', 'BMD50_Flag'
        ])
        dose_response_vals = pd.DataFrame(columns=[
            'Chemical_ID', 'End_Point', 'Dose', 'Response', 'CI_Lo', 'CI_Hi'
        ])
        fit_vals = pd.DataFrame(
            columns=['Chemical_ID', 'End_Point', 'X_vals', 'Y_vals'])

        # Populate dataframes
        bmd_vals['Chemical_ID'] = [chemical_id]
        bmd_vals['End_Point'] = [end_point]
        bmd_vals['Model'] = np.nan
        bmd_vals['BMD10'] = np.nan
        bmd_vals['BMDL'] = np.nan
        bmd_vals['BMD50'] = np.nan
        bmd_vals['DataQC_Flag'] = qc_flag
        bmd_vals['AUC'] = dose_response_auc
        bmd_vals['Min_Dose'] = dose_min
        bmd_vals['Max_Dose'] = dose_max
        bmd_vals['AUC_Norm'] = dose_response_auc_norm
        bmd_vals['BMD_Analysis_Flag'] = np.nan
        bmd_vals['BMD10_Flag'] = np.nan
        bmd_vals['BMD50_Flag'] = np.nan

        assign_nan = False
        try:  # 53_ANY24
            bogus = test_dose_response.dose[0]
            #print ("test_dose_response.dose[0]:"+str(test_dose_response.dose[0]))
        except:  # 1532_ANY24
            assign_nan = True
            # print ("test_dose_response.dose:"+str(test_dose_response.dose))
            # Series([], Name: dose, dtype: object)

        if (assign_nan):
            dose_response_vals['Chemical_ID'] = [chemical_id]
            dose_response_vals['End_Point'] = [end_point]
            dose_response_vals['Dose'] = np.nan
            dose_response_vals['Response'] = np.nan
            dose_response_vals['CI_Lo'] = np.nan
            dose_response_vals['CI_Hi'] = np.nan
        else:
            dose_response_vals['Chemical_ID'] = [chemical_id] * len(
                test_dose_response.dose)
            dose_response_vals['End_Point'] = [end_point] * len(
                test_dose_response.dose)
            dose_response_vals['Dose'] = test_dose_response.dose
            dose_response_vals[
                'Response'] = test_dose_response.num_affected / test_dose_response.total_num
            dose_response_vals['CI_Lo'] = CI_bounds[0, :]
            dose_response_vals['CI_Hi'] = CI_bounds[1, :]

        fit_vals['Chemical_ID'] = [chemical_id]
        fit_vals['End_Point'] = [end_point]
        fit_vals['X_vals'] = np.nan
        fit_vals['Y_vals'] = np.nan

        if not os.path.isfile(bmd_vals_file_name):
            bmd_vals.to_csv(bmd_vals_file_name,
                            header='column_names',
                            index=False,
                            na_rep='NULL')
        else:  # else it exists so append without writing the header
            bmd_vals.to_csv(bmd_vals_file_name,
                            mode='a',
                            header=False,
                            index=False,
                            na_rep='NULL')

        if not os.path.isfile(dose_response_vals_file_name):
            dose_response_vals.to_csv(dose_response_vals_file_name,
                                      header='column_names',
                                      index=False,
                                      na_rep='NULL')
        else:  # else it exists so append without writing the header
            dose_response_vals.to_csv(dose_response_vals_file_name,
                                      mode='a',
                                      header=False,
                                      index=False,
                                      na_rep='NULL')

        if not os.path.isfile(fit_vals_file_name):
            fit_vals.to_csv(fit_vals_file_name,
                            header='column_names',
                            index=False,
                            na_rep='NULL')
        else:  # else it exists so append without writing the header
            fit_vals.to_csv(fit_vals_file_name,
                            mode='a',
                            header=False,
                            index=False,
                            na_rep='NULL')

        # We can also set the file's metadata via the PdfPages object:
        d = pdf.infodict()
        d['Author'] = 'Paritosh Pande'
        d['CreationDate'] = datetime.datetime.today()
Exemple #19
0
def main(input_file, output, cuts_path, reference, cmap):

    bins, bin_center, bin_widths = make_default_cta_binning(e_min=0.005 *
                                                            u.TeV,
                                                            bins_per_decade=15)

    gammas, _, _ = load_signal_events(
        input_file,
        calculate_weights=False,
    )
    gammas.dropna(inplace=True)

    sigma = 1
    gammas = apply_cuts(gammas,
                        cuts_path=cuts_path,
                        theta_cuts=True,
                        sigma=sigma)

    runs = load_runs(input_file)
    mc_production = MCSpectrum.from_cta_runs(runs)

    data_description = load_data_description(input_file,
                                             gammas,
                                             cuts_path=cuts_path)

    gammas_energy = gammas.mc_energy.values

    hist_all = mc_production.expected_events_for_bins(energy_bins=bins)
    hist_selected, _ = np.histogram(gammas_energy, bins=bins)

    invalid = hist_selected > hist_all
    hist_selected[invalid] = hist_all[invalid]
    # use astropy to compute errors on that stuff
    lower_conf, upper_conf = binom_conf_interval(hist_selected,
                                                 hist_all,
                                                 conf=0.95)

    # scale confidences to match and split
    lower_conf = lower_conf * mc_production.generation_area
    upper_conf = upper_conf * mc_production.generation_area

    area = (hist_selected / hist_all) * mc_production.generation_area

    # matplotlib wants relative offsets for errors. the conf values are absolute.
    lower = area - lower_conf
    upper = upper_conf - area

    mask = area > 0
    color = None
    if cuts_path:
        f_prediction = prediction_function(cuts_path, sigma=0)
        colormap = cm.get_cmap(cmap, 512)
        color = colormap(f_prediction(bin_center.value[mask]))

        sm = cm.ScalarMappable(cmap=colormap)
        plt.colorbar(sm, label='Prediction Threshold', pad=0.01)

    plt.errorbar(
        bin_center.value[mask],
        area.value[mask],
        xerr=bin_widths.value[mask] / 2.0,
        yerr=[lower.value[mask], upper.value[mask]],
        linestyle='',
        color=color if color is not None else next(color_cycle),
        # label='Effective Area'
    )

    if reference:
        df = load_effective_area_reference()
        plt.plot(df.energy,
                 df.effective_area,
                 '--',
                 color='gray',
                 label='Reference')

    legend = plt.legend(framealpha=0, loc='upper left', handletextpad=1)
    # renderer = plt.gcf().canvas.get_renderer()
    # shift = max([t.get_window_extent(renderer).width for t in legend.get_texts()])
    for t in legend.get_texts():
        # print(t, shift)
        t.set_multialignment('right')
    #     t.set_ha('left') # ha is alias for horizontalalignment
    # t.set_position((shift,0))

    legend.set_title(data_description)
    legend._legend_box.align = "left"
    legend.get_title().set_alpha(0.5)

    plt.ylim([800, 0.5E8])
    plt.xscale('log')
    plt.yscale('log')
    plt.xlabel('True Energy / TeV')
    plt.ylabel('Effective Area / $\\text{m}^2$')
    plt.tight_layout(pad=0, rect=(0.001, 0, 1.041, 0.99))

    if output:
        plt.savefig(output)
    else:
        plt.show()
Exemple #20
0
    def process(self,
                ih,
                user_var,
                fix_thresh=False,
                method='quick',
                include=True):
        """Calculate the statistics from the current histogram.
        Keyword arguments:
        ih: an instance of the image_handler Analysis class, generates the histogram
        user_var: the user variable associated with this calculation
        fix_thresh: True - keep old threshold value, False - update the threshold value
        method: 'quick' - image_handler uses a peak finding algorithm 
                'double gaussian' - fit a double Guassian function
                'separate gaussians' - split the histogram at the threshold and fit Gaussians
                'double poissonian' - fit a double Poissonian function
                'single gaussian' - fit a single Gaussian to background peak
        include: whether to include the values in further analysis.
        """
        if ih.ind > 0:  # only update if a histogram exists
            if fix_thresh:  # using manual threshold
                bins, occ, thresh = ih.histogram(
                )  # update hist and peak stats, keep thresh
            else:
                bins, occ, thresh = ih.hist_and_thresh(
                )  # update hist and get peak stats
            bin_mid = (bins[1] - bins[0]) * 0.5  # from edge of bin to middle
            self.bf = fc.fit(bins[:-1] + bin_mid,
                             occ)  # class for fitting function to data

            try:
                int(np.log(thresh))  # don't do anything if threshold is < 1
                ih.est_peaks(bins, occ)  # use find_peaks to get first estimate
            except (ValueError, OverflowError):
                return 0

            if method == 'quick':
                A0, A1 = ih.peak_heights
                mu0, mu1 = ih.peak_centre
                sig0, sig1 = ih.peak_widths
            elif method == 'double gaussian':
                # parameters: Total num images, loading prob, centre, s.d., centre, s.d.
                self.bf.p0 = [
                    ih.ind, 0.6, ih.peak_centre[0], ih.peak_widths[0],
                    ih.peak_centre[1], ih.peak_widths[1]
                ]
                try:
                    if fix_thresh:  # bound the lower peak to below threshold
                        self.bf.getBestFit(
                            self.bf.double_gauss,
                            bounds=(np.array([0, 0, 0, 0, ih.thresh, 0]),
                                    np.array([
                                        np.inf, 1, ih.thresh, np.inf, np.inf,
                                        np.inf
                                    ])))
                    else:  # get unbounded best fit parameters
                        self.bf.getBestFit(self.bf.double_gauss)
                except:
                    return 0  # fit failed, do nothing
                if self.bf.ps[1] < self.bf.ps[4]:
                    N, A1, mu0, sig0, mu1, sig1 = self.bf.ps
                else:
                    N, A1, mu1, sig1, mu0, sig0 = self.bf.ps
                A0, A1 = N * (1 - A1), N * A1
            elif method == 'separate gaussians':  # separate Gaussian fit for bg/signal
                diff = abs(bins - thresh)  # minimum is at the threshold
                thresh_i = np.argmin(diff)  # index of the threshold
                # split the histogram at the threshold value
                best_fits = [
                    fc.fit(bins[:thresh_i] + bin_mid, occ[:thresh_i]),
                    fc.fit(bins[thresh_i:-1] + bin_mid, occ[thresh_i:])
                ]
                for b in best_fits:
                    try:
                        b.estGaussParam()  # get estimate of parameters
                        b.getBestFit(b.gauss)  # get best fit parameters
                    except:
                        return 0
                A0, mu0, sig0 = best_fits[0].ps
                A1, mu1, sig1 = best_fits[1].ps
                self.bf.p0 = [
                    A0 + A1, 1 - A0 / (A0 + A1), mu0, sig0, mu1, sig1
                ]
                self.bf.ps = [
                    A0 + A1, 1 - A0 / (A0 + A1), mu0, sig0, mu1, sig1
                ]
                self.bf.bffunc = self.bf.double_gauss  # plot as double gaussian for consistency

            elif method == 'double poissonian':
                self.bf.p0 = [
                    ih.peak_heights[0], ih.peak_centre[0], ih.peak_heights[1],
                    ih.peak_centre[1]
                ]
                try:
                    # parameters are: mean, amplitude
                    self.bf.getBestFit(self.bf.double_poisson)
                except:
                    return 0
                A0, mu0, A1, mu1 = self.bf.ps
                sig0, sig1 = np.sqrt(mu0), np.sqrt(mu1)

            elif method == 'single gaussian':
                try:
                    self.bf.estGaussParam()
                    self.bf.getBestFit(
                        self.bf.gauss)  # get best fit parameters
                except:
                    return 0  # fit failed, do nothing
                A0, mu0, sig0 = self.bf.ps
                A1, mu1, sig1 = 0, 0, 0
                fix_thresh = True
                ih.thresh = max(bins)  # set the threshold above the counts
            try:
                list(map(
                    int,
                    [A0, A1, mu0, mu1, sig0, sig1]))  # check for NaN or inf
            except (ValueError, OverflowError):
                return 0
            ih.peak_heights = [A0, A1]
            ih.peak_centre = [mu0, mu1]
            ih.peak_widths = [sig0, sig1]

            if self.bf.rchisq and abs(self.bf.rchisq) > 1e9:
                include = False  # bad fit

            # update threshold to where fidelity is maximum if not set by user
            if fix_thresh:
                ih.fidelity, ih.err_fidelity = np.around(ih.get_fidelity(),
                                                         4)  # round to 4 d.p.
            else:
                ih.hist_and_thresh()

            # update atom statistics
            ih.stats['Atom detected'] = [
                count // ih.thresh for count in ih.stats['Counts']
            ]

            above_idxs = np.where(np.array(ih.stats['Atom detected']) > 0)[
                0]  # index of images with counts above threshold
            atom_count = np.size(
                above_idxs)  # number of images with counts above threshold
            above = np.array(
                ih.stats['Counts'])[above_idxs]  # counts above threshold
            below_idxs = np.where(np.array(ih.stats['Atom detected']) <= 0)[
                0]  # index of images with counts below threshold
            empty_count = np.size(
                below_idxs)  # number of images with counts below threshold
            below = np.array(
                ih.stats['Counts'])[below_idxs]  # counts below threshold
            # use the binomial distribution to get 1 sigma confidence intervals:
            conf = binom_conf_interval(atom_count,
                                       atom_count + empty_count,
                                       interval='jeffreys')
            loading_prob = atom_count / ih.ind  # fraction of images above threshold
            uplperr = conf[1] - loading_prob  # 1 sigma confidence above mean
            lolperr = loading_prob - conf[0]  # 1 sigma confidence below mean

            # store the calculated histogram statistics as temp
            self.temp_vals['File ID'] = int(self.ind)
            self.temp_vals['Start file #'] = min(ih.stats['File ID'])
            self.temp_vals['End file #'] = max(ih.stats['File ID'])
            self.temp_vals['ROI xc ; yc ; size'] = ' ; '.join(
                list(map(str, [ih.xc, ih.yc, ih.roi_size])))
            self.temp_vals['User variable'] = self.types['User variable'](
                user_var) if user_var else 0.0
            self.temp_vals['Number of images processed'] = ih.ind
            self.temp_vals['Counts above : below threshold'] = str(
                atom_count) + ' : ' + str(empty_count)
            self.temp_vals['Loading probability'] = np.around(loading_prob, 4)
            self.temp_vals['Error in Loading probability'] = np.around(
                (uplperr + lolperr) * 0.5, 4)
            self.temp_vals['Lower Error in Loading probability'] = np.around(
                lolperr, 4)
            self.temp_vals['Upper Error in Loading probability'] = np.around(
                uplperr, 4)
            try:
                1 // empty_count  # raises ZeroDivisionError if size is 0
                1 // (empty_count - 1)  # for std dev need size > 1
                self.temp_vals['Background peak count'] = int(mu0)
                # assume bias offset is self.bias, readout noise Nr
                var = ih.roi_size * self.Nr**2 + self.dg * self.emg * mu0 / self.pag
                if var > 0:
                    self.temp_vals['sqrt(Nr^2 + Nbg*fg/A)'] = int(var**0.5)
                else:  # don't take the sqrt of a -ve number
                    self.temp_vals['sqrt(Nr^2 + Nbg*fg/A)'] = 0
                self.temp_vals['Background peak width'] = int(sig0)
                self.temp_vals['Error in Background peak count'] = np.around(
                    sig0 / empty_count**0.5, 2)
                self.temp_vals['Background mean'] = np.around(
                    np.mean(below), 1)
                self.temp_vals['Background standard deviation'] = np.around(
                    np.std(below, ddof=1), 1)
            except ZeroDivisionError:
                for key in [
                        'Background peak count', 'sqrt(Nr^2 + Nbg*fg/A)',
                        'Background peak width',
                        'Error in Background peak count'
                ]:
                    self.temp_vals[key] = 0
            try:
                1 // atom_count  # raises ZeroDivisionError if size is 0
                1 // (atom_count - 1)  # for std dev need size > 1
                self.temp_vals['Signal peak count'] = int(mu1)
                # assume bias offset is self.bias, readout noise Nr
                var = ih.roi_size * self.Nr**2 + self.dg * self.emg * mu1 / self.pag
                if var > 0:
                    self.temp_vals['sqrt(Nr^2 + Ns*fg/A)'] = int(var**0.5)
                else:  # don't take the sqrt of a -ve number
                    self.temp_vals['sqrt(Nr^2 + Ns*fg/A)'] = 0
                self.temp_vals['Signal peak width'] = int(sig1)
                self.temp_vals['Error in Signal peak count'] = np.around(
                    sig1 / atom_count**0.5, 2)
                self.temp_vals['Signal mean'] = np.around(np.mean(above), 1)
                self.temp_vals['Signal standard deviation'] = np.around(
                    np.std(above, ddof=1), 1)
                sep = mu1 - mu0  # separation of fitted peaks
                self.temp_vals['Separation'] = int(sep)
                seperr = np.sqrt(sig0**2 / empty_count + sig1**2 / atom_count)
                self.temp_vals['Error in Separation'] = np.around(seperr, 2)
                self.temp_vals['Fidelity'] = ih.fidelity
                self.temp_vals['Error in Fidelity'] = ih.err_fidelity
                self.temp_vals['S/N'] = np.around(
                    sep / np.sqrt(sig0**2 + sig1**2), 2)
                # fractional error in the error is 1/sqrt(2N - 2)
                self.temp_vals['Error in S/N'] = np.around(
                    self.temp_vals['S/N'] *
                    np.sqrt((seperr / sep)**2 +
                            (sig0**2 / (2 * empty_count - 2) + sig1**2 /
                             (2 * atom_count - 2)) / (sig0**2 + sig1**2)), 2)
                self.temp_vals['Include'] = include
            except ZeroDivisionError:
                for key in [
                        'Signal peak count', 'sqrt(Nr^2 + Ns*fg/A)',
                        'Signal peak width', 'Error in Signal peak count',
                        'Separation', 'Error in Separation', 'Fidelity',
                        'Error in Fidelity', 'S/N', 'Error in S/N', 'Include'
                ]:
                    self.temp_vals[key] = 0
            self.temp_vals['Threshold'] = int(ih.thresh)
        return 1  # fit successful
Exemple #21
0
def psychFit(deltaBins, numR, numL, choices):
    """
    Get psychometric curve fit from # of cues to Right & Left side and choice made by subject
    (Evidence vs % Choice Left)
    """
    numRight = np.zeros(len(deltaBins))
    numTrials = np.zeros(len(deltaBins))
    trialDelta = np.zeros(len(deltaBins))
    phat = np.zeros(len(deltaBins))
    pci = np.zeros((2, len(deltaBins)))

    # Evidence variable
    nCues_RminusL = numR - numL
    # Correct deltaBin & trialBin to produce same result as Matlab psychFit
    deltaBins_search = deltaBins.astype(float) - 1.5
    trialBin = np.searchsorted(deltaBins_search, nCues_RminusL, side='right')
    trialBin -= 1

    # Put into evidence bins all Trials with corresponding choices
    for iTrial in range(len(choices)):
        numTrials[trialBin[iTrial]] = numTrials[trialBin[iTrial]] + 1
        if choices[iTrial] == 2:
            numRight[trialBin[iTrial]] = numRight[trialBin[iTrial]] + 1

        trialDelta[trialBin[iTrial]] = trialDelta[
            trialBin[iTrial]] + nCues_RminusL[iTrial]

    with np.errstate(divide='ignore', invalid='ignore'):
        trialDelta = np.true_divide(trialDelta, numTrials)

    # Select only bins with trials
    idx_zero = numTrials == 0
    numTrials_nz = numTrials[~idx_zero]
    numRight_nz = numRight[~idx_zero]

    # (Binomial proportion confidence interval given k successes, n trials)
    phat_nz = binom_conf_interval(numRight_nz,
                                  numTrials_nz,
                                  confidence_level=0,
                                  interval='jeffreys')
    pci_nz = binom_conf_interval(numRight_nz,
                                 numTrials_nz,
                                 confidence_level=1 - 0.1587,
                                 interval='jeffreys')

    # Correct confidence intervals and expected outcomes for bins with no trials (ci = [0 1], hat = 0.5)
    phat_nz = phat_nz[0]
    phat[~idx_zero] = phat_nz
    phat[idx_zero] = 0.5
    pci[0][~idx_zero] = pci_nz[0]
    pci[0][idx_zero] = 0
    pci[1][~idx_zero] = pci_nz[1]
    pci[1][idx_zero] = 1

    # (Logistic function fit) only valid if we have at least 5 bins with trials
    if np.count_nonzero(~idx_zero) < 5:
        is_there_psychometric = False
    else:
        is_there_psychometric = True
        # Get weight matrix to "reproduce" Matlab fit
        # https://stackoverflow.com/questions/58983113/scipy-curve-fit-vs-matlab-fit-weighted-nonlinear-least-squares
        # matlab -> 'Weights'         , ((pci(sel,2) - pci(sel,1))/2).^-2
        # python -> sigma = diagonal_matrix(1/weights)

        weight_array = np.power((pci[1][~idx_zero] - pci[0][~idx_zero]) / 2, 2)
        sigma_fit = np.diag(weight_array)

        psychometric, pcov = curve_fit(psychometrics_function, deltaBins[~idx_zero], phat[~idx_zero], \
                                       p0=(0, 1, 3, 0), sigma=sigma_fit, maxfev=40000)

    # Append a row of nans to confidence intervals . whyy ??
    aux_vec = np.empty((1, pci.shape[1]))
    aux_vec[:] = np.nan
    pci = np.vstack((pci, aux_vec))

    # x vector for plotting
    delta = np.linspace(deltaBins[0] - 2, deltaBins[-1] + 2, num=50)

    # Repeat trialDelta 3 times for errorX why ??
    errorX = np.tile(trialDelta[~idx_zero], 3)

    # Confidence intervals are errorY, as a vector
    errorY = np.stack(pci[:, ~idx_zero])
    errorY = errorY.flatten()

    # Fill  dictionary of results
    fit_results = dict()
    fit_results['delta_bins'] = deltaBins[~idx_zero]
    fit_results['delta_data'] = trialDelta[~idx_zero]
    fit_results['pright_data'] = 100 * phat[~idx_zero]
    fit_results['delta_error'] = errorX
    fit_results['pright_error'] = 100 * errorY

    if is_there_psychometric:
        fit_results['delta_fit'] = delta
        fit_results['pright_fit'] = psychometrics_function(
            delta, *psychometric) * 100
    else:
        fit_results['delta_fit'] = np.empty([0])
        fit_results['pright_fit'] = np.empty([0])

    return fit_results