Example #1
0
def _fig_density(sweight, surweight, pval, nlm):
    """
    Plot the histogram of sweight across the image
    and the thresholds implied by the surrogate model (surweight)
    """
    import matplotlib.pylab as mp
    # compute some thresholds
    nlm = nlm.astype('d')
    srweight = np.sum(surweight,1)
    srw = np.sort(srweight)
    nitem = np.size(srweight)
    thf = srw[int((1-min(pval,1))*nitem)]
    mnlm = max(1,nlm.mean())
    imin = min(nitem-1,int((1.-pval/mnlm)*nitem))
    
    thcf = srw[imin]
    h,c = np.histogram(sweight,100)
    I = h.sum()*(c[1]-c[0])
    h = h/I
    h0,c0 = np.histogram(srweight,100)
    I0 = h0.sum()*(c0[1]-c0[0])
    h0 = h0/I0
    mp.figure(1)
    mp.plot(c,h)
    mp.plot(c0,h0)
    mp.legend(('true histogram','surrogate histogram'))
    mp.plot([thf,thf],[0,0.8*h0.max()])
    mp.text(thf,0.8*h0.max(),'p<0.2, uncorrected')
    mp.plot([thcf,thcf],[0,0.5*h0.max()])
    mp.text(thcf,0.5*h0.max(),'p<0.05, corrected')
    mp.savefig('/tmp/histo_density.eps')
    mp.show()
Example #2
0
def edge_detect(img):
    BLUR_SIZE = 51
    TRUNC_RATIO = 0.75
    CLOSING_SIZE = 5

    # denoised = cv2.fastNlMeansDenoisingColored(img,None,10,10,7,21)
    # img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    # too_bright=np.logical_and(img[:,:,1]<50, img[:,:,2]>200)
    # np.set_printoptions(threshold=np.nan)
    # np.savetxt('conconcon',img[:,:,1],'%i')
    # img[:,:,1]=np.where(too_bright, np.sqrt(img[:,:,1])+70, img[:,:,1])
    # img = cv2.cvtColor(img, cv2.COLOR_HSV2BGR)

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    blur = cv2.blur(gray, (BLUR_SIZE, BLUR_SIZE))

    edge = np.floor(0.5 * gray + 0.5 * (255 - blur)).astype('uint8')

    hist,bins = np.histogram(edge.flatten(), 256, [0, 256])
    cdf = hist.cumsum()
    cdf_normalized = cdf * hist.max() / cdf.max()
    cdf_m = np.ma.masked_equal(cdf, 0)
    cdf_m = (cdf_m - cdf_m.min()) * 255 / (cdf_m.max() - cdf_m.min())
    cdf = np.ma.filled(cdf_m, 0).astype('uint8')
    equ = cdf[edge]

    hist,bins = np.histogram(equ.flatten(),256,[0,256])
    max_idx = np.argmax(hist);
    hist_clean = np.where(equ > TRUNC_RATIO * max_idx, 255, equ)

    kernel = np.ones((CLOSING_SIZE, CLOSING_SIZE), np.uint8)
    closing = cv2.morphologyEx(hist_clean, cv2.MORPH_CLOSE, kernel)
    plt.imshow(closing, cmap='Greys_r')
    plt.show()
    cv2.waitKey(100)
Example #3
0
 def _set_xyvals(self,val):
     data = np.ma.asarray(val).ravel()
     w=np.isinf(data.data)
     if data.mask is not np.ma.nomask:
         w = w|data.mask
     data2 = data.data[~w]
     bins = long(min(data2.size/20, 5000))
     if bins < 3: bins=data2.size
     try:
         # for numpy 1.1, use new bins format (left and right edges)
         hist, bins = np.histogram(data2,bins=bins,
                                    range=(self.vmin,self.vmax),
                                    new=True)
     except TypeError:
         # for numpy <= 1.0 or numpy >= 1.2, no new keyword
         hist, bins = np.histogram(data2,bins=bins,
                                    range=(self.vmin,self.vmax))
     if bins.size == hist.size+1:
         # new bins format, remove last point
         bins = bins[:-1]
     hist = hist.astype(np.float)/np.float(hist.sum())
     self.yval = np.concatenate([0., hist.cumsum(), 1.], None)
     self.xval = np.concatenate([self.vmin,
                                  bins + 0.5*(bins[1]-bins[0]),
                                  self.vmax], None)
Example #4
0
def pp_plot(f, p, nbins, ax=None):
    """ P-P plot of the empirical CDFs of values in two lists, f and p. """
    if ax is None:
        ax = plt.gca()

    uniqe_vals_f = list(set(f))
    uniqe_vals_p = list(set(p))

    combine = uniqe_vals_f
    combine.extend(uniqe_vals_p)
    combine = list(set(combine))

    if len(uniqe_vals_f) > nbins:
        bins = nbins
    else:
        bins = sorted(combine)
        bins.append(bins[-1]+bins[-1]-bins[-2])

    ff, edges = np.histogram(f, bins=bins, density=True)
    fp, _ = np.histogram(p, bins=edges, density=True)

    Ff = np.cumsum(ff*(edges[1:]-edges[:-1]))
    Fp = np.cumsum(fp*(edges[1:]-edges[:-1]))

    plt.plot([0, 1], [0, 1], c='dodgerblue', lw=2, alpha=.8)
    plt.plot(Ff, Fp, c='black', lw=2, alpha=.9)
    plt.xlim([0, 1])
    plt.ylim([0, 1])
Example #5
0
 def spectrum(self, shape, surface_point, bound):
     """Returns the counts histogram (bins,counts) for """
     
     wavelengths = []
     key = shape.surface_identifier(surface_point)
     if not self.store.has_key(key):
         return None
     
     entries = self.store[key]
     if len(entries) == 0:
         return None
     
     for entry in entries:
         if entry[2] == bound:
             wavelengths.append(float(entry[1]))
     
     if len(wavelengths) is 0:
         return None
     
     wavelengths = np.array(wavelengths)
     min = wavelengths.min()
     max = wavelengths.max()
     
     if len(wavelengths) is 1:
         bins = np.arange(np.floor( wavelengths[0] - 1), np.ceil(wavelengths[0] + 2))
         freq, bins  = np.histogram(wavelengths, bins=bins)
     else:
         bins = np.arange(np.floor( wavelengths.min()-1), np.ceil(wavelengths.max()+2))
         freq, bins  = np.histogram(wavelengths, bins=bins)
     return Spectrum(bins[0:-1], freq)
Example #6
0
    def histogram(self, idx=None, critval=None):
        '''calculate histogram values

        does not do any plotting
        '''
        if self.mcres.ndim == 2:
            if  not idx is None:
                mcres = self.mcres[:,idx]
            else:
                raise ValueError('currently only 1 statistic at a time')
        else:
            mcres = self.mcres

        if critval is None:
            histo = np.histogram(mcres, bins=10)
        else:
            if not critval[0] == -np.inf:
                bins=np.r_[-np.inf, critval, np.inf]
            if not critval[0] == -np.inf:
                bins=np.r_[bins, np.inf]
            histo = np.histogram(mcres,
                                 bins=np.r_[-np.inf, critval, np.inf])

        self.histo = histo
        self.cumhisto = np.cumsum(histo[0])*1./self.nrepl
        self.cumhistoreversed = np.cumsum(histo[0][::-1])[::-1]*1./self.nrepl
        return histo, self.cumhisto, self.cumhistoreversed
Example #7
0
def metal_con(filename, distances, real_dist, bins=35, limits=(-3.1, 0.2),
              avgs=1, detection=1, tag="out"):
    """ main bit """
    if filename[-4:] == '.csv':  delim = ','
    else:  delim = None
    data = shift_data(fi.read_data(filename, delim), real_dist, distances[0])
    mod_actual = 5.*(ma.log10(real_dist*1000) - 1.)
    mod_new = 5.*(ma.log10(distances[0]*1000) - 1.)
    mod = mod_actual - mod_new
    print "Effective Magnitude Shift = {0}, average g={1}".format(mod, sc.mean(data[:,2]))
    new_data = cut_data(data, 4,2,3,5, deff=0, modulus=mod, full=1)
    FeH = get_photo_metal(new_data[:,4],new_data[:,2],new_data[:,3])
    ref_hist = np.histogram(FeH, bins, limits)
    hist = []
    #Also iterate over several runs and average
    for i in range(len(distances)):
        print "#- Convolving to distance {0} kpc".format(distances[i])
        if i==0:  deff=0
        else: deff=detection
        temp_hist = []
        for j in range(avgs):
            #holds dist constant, applies appropriate errors for new distance
            new_data = con.convolve(data, real_dist, distances[i])
            #shift data so detection efficiency works correctly;  has no noticable effect if deff=0
            new_data = shift_data(new_data, distances[0], distances[i])
            # apply color cuts and detection efficiency to shifted and convolved data
            new_data = cut_data(new_data, 4,2,3,5, deff=deff, modulus=None, full=0)
            print "Average g = {0}, total stars = {1}".format(sc.mean(new_data[:,2]), len(new_data[:,0]))
            FeH = get_photo_metal(new_data[:,4],new_data[:,2],new_data[:,3])
            temp_hist.append(np.histogram(FeH, bins, limits))
        new_hist = avg_hists(temp_hist)
        hist.append(new_hist)
    plot_hists(hist, ref_hist, distances, tag)
    return hist
Example #8
0
    def testSingletonZ(self):
        shape = (200, 100, 1)
        tags = 'xyc'
        zeros = numpy.zeros(shape)
        ones = numpy.ones(shape)
        vol = numpy.concatenate((zeros, ones), axis=2)
        vol = vigra.taggedView(vol, axistags=tags)
        assert vol.shape[vol.axistags.index('c')] == 2

        oper5d = OpThresholdTwoLevels(graph=Graph())
        oper5d.InputImage.setValue(vol)
        oper5d.MinSize.setValue(1)
        oper5d.MaxSize.setValue(zeros.size)
        oper5d.HighThreshold.setValue(.5)
        oper5d.LowThreshold.setValue(.5)
        oper5d.Channel.setValue(0)
        oper5d.CurOperator.setValue(1)

        # no smoothing
        oper5d.SmootherSigma.setValue({'x': 0.0, 'y': 0.0, 'z': 0.0})
        out5d = oper5d.Output[:].wait()
        assert numpy.all(out5d == 0), str(numpy.histogram(out5d))

        # smoothing
        oper5d.SmootherSigma.setValue({'x': 1.0, 'y': 1.0, 'z': 1.0})
        out5d = oper5d.Output[:].wait()
        assert numpy.all(out5d == 0), str(numpy.histogram(out5d))
def test_plot():
    import math
    from numpy.random import normal
    from scipy import stats
    global data

    def f(x):
        return 2*x + 1

    mean = 2
    var = 3
    std = math.sqrt(var)

    data = normal(loc=2, scale=std, size=50000)

    d2 = f(data)
    n = scipy.stats.norm(mean, std)

    kde1 = stats.gaussian_kde(data,  bw_method='silverman')
    kde2 = stats.gaussian_kde(d2,  bw_method='silverman')
    xs = np.linspace(-10, 10, num=200)

    #plt.plot(data)
    plt.plot(xs, kde1(xs))
    plt.plot(xs, kde2(xs))
    plt.plot(xs, n.pdf(xs), color='k')

    num_bins=100
    h = np.histogram(data, num_bins, density=True)
    plt.plot(h[1][1:], h[0], lw=4)

    h = np.histogram(d2, num_bins, density=True)
    plt.plot(h[1][1:], h[0], lw=4)
Example #10
0
def main():
    args = _args()

    tests = {
        'squared_diff': test_squared_diff,
        'chi_square': chi_square,
        'chi_square_shape': chi_square_shape,
        'ks': kolmogorov_smirnov
    }
    if args.test_type not in tests:
        print('--test_type not found, available: {}'.format(tests.keys()))
        return

    # Create histograms with the same bins.
    if args.raw_data:
        print('Reading {}'.format(args.data_1))
        data_1 = np.loadtxt(args.data_1)
        print('Reading {}'.format(args.data_2))
        data_2 = np.loadtxt(args.data_2)
        print('Data read')
    
        if args.frames and args.raw_data:
            data_1 = data_1[:args.frames]
            data_2 = data_2[:args.frames]

        min_bins, max_bins = map(float, args.min_max.split(':'))
        bins = np.arange(min_bins, max_bins, (max_bins-min_bins)/args.bins)
        histogram_1, _ = np.histogram(data_1, bins=bins, density=False)
        histogram_2, _ = np.histogram(data_2, bins=bins, density=False)
    else:
        histogram_1 = np.loadtxt(args.data_1, usecols=(0, 1))
        histogram_2 = np.loadtxt(args.data_2, usecols=(0, 1))

    print('Running test {}'.format(args.test_type))
    tests[args.test_type](histogram_1, histogram_2)
Example #11
0
def generate_f_score_gate(
        neg_smaple,
        pos_sample,
        chan,
        beta=1,
        theta=2,
        high=True):
    """
    given a negative and a positive sample, calculate the 'optimal' threshold gate
    position from aproximate f-score calculation
    """

    neg_hist, bins = numpy.histogram(neg_smaple[:, chan], 1000, normed=True)
    pos_hist, bins = numpy.histogram(pos_sample[:, chan], bins, normed=True)

    xs = (bins[1:] + bins[:-1]) / 2.0

    x0 = numpy.argmax(neg_hist)

    dfa = diff_pseudo_f1(neg_hist[x0:], pos_hist[x0:], beta=beta, theta=theta)

    f_cutoff = xs[x0 + numpy.argmax(dfa)]

    if high:
        return ThresholdGate(f_cutoff, chan, 'g')
    else:
        return ThresholdGate(f_cutoff, chan, 'l')
Example #12
0
        def mark_lalo_anomoly(lat, lon):
            """mask pixels with abnormal values (0, etc.)
            This is found on sentinelStack multiple swath lookup table file.
            """
            # ignore pixels with zero value
            zero_mask = np.multiply(lat != 0., lon != 0.)

            # ignore anomaly non-zero values 
            # by get the most common data range (d_min, d_max) based on histogram
            mask = np.array(zero_mask, np.bool_)
            for data in [lat, lon]:
                bin_value, bin_edge = np.histogram(data[mask], bins=10)                
                # if there is anomaly, histogram won't be evenly distributed
                while np.max(bin_value) > np.sum(zero_mask) * 0.3:
                    # find the continous bins where the largest bin is --> normal data range
                    bin_value_thres = ut.median_abs_deviation_threshold(bin_value, cutoff=3)
                    bin_label = ndimage.label(bin_value > bin_value_thres)[0]
                    idx = np.where(bin_label == bin_label[np.argmax(bin_value)])[0]
                    # convert to min/max data value
                    bin_step = bin_edge[1] - bin_edge[0]
                    d_min = bin_edge[idx[0]] - bin_step / 2.
                    d_max = bin_edge[idx[-1]+1] + bin_step / 2.
                    mask *= np.multiply(data >= d_min, data <= d_max)
                    bin_value, bin_edge = np.histogram(data[mask], bins=10)
            lat[mask == 0] = 90.
            lon[mask == 0] = 0.
            return lat, lon, mask
Example #13
0
    def hist_average_quality(self, fontsize=16, bins=None):
        """

        bins is from 0 to 94 
        """

        hq_qv = [pylab.mean([ord(X)-33 for X in read['quality'].decode()]) 
                for read in self.hq_sequence]
        lq_qv = [pylab.mean([ord(X) -33 for X in read['quality'].decode()]) 
            for read in self.lq_sequence]

        if bins is None:
            bins = range(0,94)
        Y1, X = np.histogram(hq_qv, bins=bins)
        Y2, X = np.histogram(lq_qv, bins=bins)
        pylab.bar(X[1:], Y1, width=1, label="HQ")
        pylab.bar(X[1:], Y2, bottom=Y1, width=1, label="LQ")
        pylab.xlim([0.5, 93.5])

        pylab.xlabel("Isoform average QV")
        pylab.ylabel("# Isoform")
        pylab.legend(fontsize=fontsize)

        ax = pylab.twinx()
        N = np.sum(Y1+Y2)
        ax.plot(X, [N] + list(N-np.cumsum(Y1+Y2)), "k")
Example #14
0
def colour_hist(data, cidx, spikes):
    pn = param_names[cidx]
    un = param_units[cidx]
    grouped_sd = {}
    grouped_md = {}
    grouped_rms = {}
    maxsd = 0
    maxmd = 0
    maxsq = 0
    for config in data:
        k = config[cidx]
        if ((len(data[config]["OU"]["spikes"]) > 0) ^ spikes):
            continue
        if k in grouped_sd:
            grouped_sd[k].append(data[config]["sd"])
            grouped_md[k].append(data[config]["md"]*1e3)
            grouped_rms[k].append(data[config]["rms"]*1e3)
        else:
            grouped_sd[k] = [data[config]["sd"]]
            grouped_md[k] = [data[config]["md"]*1e3]
            grouped_rms[k] = [data[config]["sq"]*1e3]
        maxsd = max(maxsd, data[config]["sd"])
        maxmd = max(maxsd, data[config]["md"]*1e3)
        maxsq = max(maxsd, data[config]["sq"]*1e3)

    for k in sorted(grouped_sd.iterkeys()):
        plt.figure("Spike distance histogram")
        y, x = np.histogram(grouped_sd[k], bins=np.linspace(0, maxsd, 6), normed=True)
        plt.plot(x[:-1], y, label="{} = {}".format(pn, display_in_unit(k, un)))
        plt.figure("Max difference histogram")
        y, x = np.histogram(grouped_md[k], bins=np.linspace(0, maxmd, 6), normed=True)
        plt.plot(x[:-1], y, label="{} = {}".format(pn, display_in_unit(k, un)))
        plt.figure("RMSE histogram")
        y, x = np.histogram(grouped_rms[k], bins=np.linspace(0, maxsq, 6), normed=True)
        plt.plot(x[:-1], y, label="{} = {}".format(pn, display_in_unit(k, un)))

    if spikes:
        sx = "spks"
    else:
        sx = "nspk"
    plt.figure("Spike distance histogram")
    plt.legend()
    plt.xlabel("SPIKE-distance")
    plt.ylabel("Number of samples")
    plt.savefig("sdhist_{}_{}.pdf".format(pn.replace("$",""), sx))
    plt.clf()

    plt.figure("Max difference histogram")
    plt.legend()
    plt.xlabel("Maximum difference (mV)")
    plt.ylabel("Number of samples")
    plt.savefig("mdhist_{}_{}.pdf".format(pn.replace("$",""), sx))
    plt.clf()

    plt.figure("RMSE histogram")
    plt.legend()
    plt.xlabel("Root mean squared error (mV)")
    plt.ylabel("Number of samples")
    plt.savefig("rmshist_{}_{}.pdf".format(pn.replace("$",""), sx))
    plt.clf()
Example #15
0
 def armar_vector_gris(self):
   img = cv2.imread(self.filename,0)
   equ = cv2.equalizeHist(img)
   res = np.hstack((img,equ)) #stacking images side-by-side
   hist,bins = np.histogram(img.flatten(),256,[0,256])
   histequ,binsequ = np.histogram(equ.flatten(),256,[0,256])
   return histequ
def test_cl():
    N_bins = 75
    steps = 5000
    accuracy = 0.005

    Q_grid_B = np.linspace( -10, 10., N_bins) 
    Q_grid_SB = np.linspace( -10., 10., N_bins) 

    Qval_B = np.random.normal( -2, 1.82, 10000 )
    Qval_SB = np.random.normal( 2, 1.82, 10000 )

    hist_SB = np.histogram(Qval_SB, bins = Q_grid_SB)[0]
    hist_B =  np.histogram(Qval_B, bins = Q_grid_B)[0]

    overlap = statistic.get_cl(hist_SB, hist_B,\
                               Q_grid_SB, Q_grid_B,\
                               N_bins, steps, accuracy)

    #want per cent level agreement
    overlap = 100 * overlap
    overlap = int( overlap )

    print overlap
    print '12 and 14 are OK'
    assert overlap == 13 
def createThetaHistogramBinList(theta_bins, d0_resolution_data):
    theta_histogram_bins_list = []
    for  theta, d0_resolution_datum in zip(theta_bins, d0_resolution_data ):
        if len(d0_resolution_datum) > 60:
            hist, bins = np.histogram(d0_resolution_datum, int(len(d0_resolution_datum)/60.))
            max_value = max(hist)
            max_index = hist.tolist().index(max_value)
            change = 1
            while change > 0:
                oldLen = len(d0_resolution_datum)
                left_cut = 0
                right_cut = -1
            
                try:
                    left_cut = hist[max_index:0].index(0, max_index, 0)
                except:
                    pass
            
                try:
                    right_cut = hist[max_index:].index(0, max_index)
                except:
                    pass

                hist, bins = np.histogram(filter(lambda x : bins[left_cut] < x < bins[right_cut], d0_resolution_datum), int(len(d0_resolution_datum)/60.))
                change = len(d0_resolution_datum) - oldLen
            theta_histogram_bins_list.append((theta, hist, bins))
        else:
            print len(d0_resolution_datum)
    return theta_histogram_bins_list
Example #18
0
def corner_correction(corner):
    X_l2 = []
    Y_l2 = []

    pt_indices = global_tree.query_ball_point(corner,r=30)
    for i in pt_indices:
        x = globalX_l[i]
        y = globalY_l[i]
        X_l2.append(x)
        Y_l2.append(y)

        # plt.plot(x,y,"o",color="red")

    # plt.plot(corner[0],corner[1],"o",color="green")

    x_counter,x_bins= numpy.histogram(X_l2,len(X_l2)/10)
    x_max = max(x_counter)
    x1 = int(numpy.median([(x_bins[i]+x_bins[i+1])/2. for i in range(len(x_counter)) if x_counter[i] == x_max]))
    # plt.show()
    # n, bins, patches = plt.hist(Y_l2, len(Y_l2)/10, histtype='step')
    y_counter,y_bins = numpy.histogram(Y_l2,len(Y_l2)/10)
    y_max = max(y_counter)
    y1= int(numpy.median([(y_bins[i]+y_bins[i+1])/2. for i in range(len(y_counter)) if y_counter[i] == y_max]))

    return x1,y1
Example #19
0
def deltanll2():
    z_values_bonly = sql("results/deltanll_hypo_bonly.db", "select hypotest__nll_sb - hypotest__nll_b from products")
    z_values_bonly = map(lambda x: math.sqrt(2*abs(x[0])), z_values_bonly)
    z_plot_bonly = plotdata()
    z_plot_bonly.legend = "B only hypothesis"
    z_plot_bonly.color = '#0000aa'
    z_plot_bonly.set_from_nphisto(numpy.histogram(z_values_bonly, bins = 100, range = (0.0, 5.0)))
    
    z_values_sb = sql("results/deltanll_hypo.db", "select hypotest__nll_sb - hypotest__nll_b from products")
    z_values_sb = map(lambda x: math.sqrt(2*abs(x[0])), z_values_sb)
    z_plot_sb = plotdata()
    z_plot_sb.legend = "S + B hypothesis"
    z_plot_sb.color = '#00aa00'
    z_plot_sb.set_from_nphisto(numpy.histogram(z_values_sb, bins = 100, range = (0.0, 5.0)))
    
    z_plot_bonly.fill_xrange = (median_z_est, 999.0)
    
    # determine the expected p value.
    # a. all "background only" events:
    N = len(z_values_bonly)
    # b. count "background only" events above the median_z_est for "signal + background": 
#    global median_z_est
    Np = len(filter(lambda x: x > median_z_est, z_values_bonly)) * 1.0
    # c. p-value is now very easy; convert it to a Z value:
    p = Np / N
    print "deltanll2: expected Z = %f" % p_to_Z(p)
    
    median_line = lambda ax: (ax.add_line(matplotlib.lines.Line2D([median_z_est, median_z_est], [1.0, 10000.], lw=1.0, color=z_plot_sb.color, ls='-', drawstyle='default')),
         ax.add_artist(matplotlib.text.Text(median_z_est, 10000 * 1.1, "median of S + B", horizontalalignment = "center", color=z_plot_sb.color, size='smaller')),
         ax.add_artist(matplotlib.text.Text(3.5, 80, "$N_{\mathrm{PE}} \cdot \hat p$", color=z_plot_bonly.color, size='smaller'))
          )
    plot((z_plot_bonly,z_plot_sb), '$Z_{\mathrm{est}}$', '$N_{\mathrm{PE}}$ per bin', 'results/deltanll_hypo2.pdf', log = True, ymin = 1.0, ax_modifier=median_line)
Example #20
0
def get_grids(train, test, outputFile = False, train_output = None, test_output = None, n = 10, m = 10, x = 'x', y = 'y'):
    if isinstance(train, basestring):
        train = pd.read_csv(train)
    if isinstance(test, basestring):
        test = pd.read_csv(test)

    # getting the cutoff values for x and y axis, using training set ONLY - because of the IMPORTANT ASSUMPTION -
    # TESTING SET IS SUBSET OF TRAINING SET IN TERMS OF X AND Y COORDINATES
    x_count, x_cutoff = np.histogram(train[x], bins = n)
    y_count, y_cutoff = np.histogram(train[y], bins = m)

    # transform cutoff values into step-wise tuples
    x_bin_tuple = [(floor, ceiling) for floor, ceiling in pairwise(x_cutoff)]
    y_bin_tuple = [(floor, ceiling) for floor, ceiling in pairwise(y_cutoff)]

    train_x_splits = split_df_rows_on_col_ranges(train, x, x_bin_tuple) # getting list of N bars based on x values for train
    test_x_splits = split_df_rows_on_col_ranges(test, x, x_bin_tuple) # getting list of N bars based on x values for test

    # within each bar (overall N) splitted based on x, there will be M splits based on y - each one is a grid
    trainDict = cut_y_bars_in_x_bar(train_x_splits, y, y_bin_tuple)
    testDict = cut_y_bars_in_x_bar(test_x_splits, y, y_bin_tuple)

    if outputFile:
        for key in trainDict:
            filename = 'train_' + 'x' + str(key[0]) + '_y' + str(key[1]) + '.csv'
            fullpath = os.path.join(train_output, filename)
            trainDict[key].to_csv(fullpath, index = False)
        for key in testDict:
            filename = 'test_' + 'x' + str(key[0]) + '_y' + str(key[1]) + '.csv'
            fullpath = os.path.join(test_output, filename)
            testDict[key].to_csv(fullpath, index = False)
    return (trainDict, testDict)
Example #21
0
def cartonify_image(image):
    """
    convert an inpuy image to a cartoon-like image
    Args:
       image: input PIL image

    Returns:
        out (numpy.ndarray): A grasycale or color image of dtype uint8, with
                             the shape of image
    """

    output = np.array(image)
    x, y, c = output.shape

    # noise removal while keeping edges sharp
    for i in xrange(c):
        output[:, :, i] = cv2.bilateralFilter(output[:, :, i], 5, 50, 50)

    #edges in an image using the Canny algorithm
    edge = cv2.Canny(output, 100, 200)
    #convert image into RGB color space
    output = cv2.cvtColor(output, cv2.COLOR_RGB2HSV)

    #historygram array
    hists = []

    #Compute the histogram of a set of data.
    #H
    hist, _ = np.histogram(output[:, :, 0], bins=np.arange(180+1))
    hists.append(hist)
    #S
    hist, _ = np.histogram(output[:, :, 1], bins=np.arange(256+1))
    hists.append(hist)
    #V
    hist, _ = np.histogram(output[:, :, 2], bins=np.arange(256+1))
    hists.append(hist)

    centroids = []
    for h in hists:
        centroids.append(kmeans_histogram(h))
    print("centroids: {0}".format(centroids))

    output = output.reshape((-1, c))
    for i in xrange(c):
        channel = output[:, i]
        index = np.argmin(np.abs(channel[:, np.newaxis] - centroids[i]), axis=1)
        output[:, i] = centroids[i][index]
    output = output.reshape((x, y, c))
    output = cv2.cvtColor(output, cv2.COLOR_HSV2RGB)

    # Retrieves contours from the binary image
    # RETR_EXTERNAL: retrieves only the extreme outer contours
    # CHAIN_APPROX_NONE= stores absolutely all the contour points
    contours, _ = cv2.findContours(edge,
                                   cv2.RETR_EXTERNAL,
                                   cv2.CHAIN_APPROX_NONE)

    # Draws contours outlines
    cv2.drawContours(output, contours, -1, 0, thickness=1)
    return output
Example #22
0
    def visualize_performance(self):
        intra = self._intra
        inter = self._inter

        labels = [1]*len(intra) + [-1]*len(inter)
        scores = intra+inter

        self._common_visualize_performance( labels, scores)

        plt.figure()
        plt.boxplot([intra, inter])
        plt.xticks([1, 2], ['intra', 'inter'])
        plt.title('Distribution of scores')
        plt.savefig('comparison_score_distribution.pdf')


        plt.figure()
        start = np.min(np.min(intra), np.min(inter))
        end = np.max(np.max(intra), np.max(inter))
        intra_hist, intra_bin = np.histogram(intra,50, (start, end))
        inter_hist, inter_bin = np.histogram(inter,50, (start, end))


        plt.plot(intra_bin[:-1], intra_hist/float(intra_hist.sum()), label='intra', color='blue')
        plt.plot(inter_bin[:-1], inter_hist/float(inter_hist.sum()), label='inter', color='red')
        plt.legend()
        plt.xlabel('Comparison scores')
        plt.ylabel('Probability')
        plt.title('Score distribution')
Example #23
0
    def calculate_spectrum(self):

        if self.tardis_config.sn_distance is None:
            logger.info('Distance to supernova not selected assuming 10 pc for calculation of spectra')
            distance = units.Quantity(10, 'pc').to('cm').value
        else:
            distance = self.tardis_config.sn_distance
        self.spec_flux_nu = np.histogram(self.montecarlo_nu[self.montecarlo_nu > 0],
                                         weights=self.montecarlo_energies[self.montecarlo_energies > 0],
                                         bins=self.spec_nu_bins)[0]

        flux_scale = (self.time_of_simulation * (self.spec_nu[1] - self.spec_nu[0]) * (4 * np.pi * distance ** 2))

        self.spec_flux_nu /= flux_scale

        self.spec_virtual_flux_nu /= flux_scale

        self.spec_reabsorbed_nu = \
            np.histogram(self.montecarlo_nu[self.montecarlo_nu < 0],
                         weights=self.montecarlo_energies[self.montecarlo_nu < 0], bins=self.spec_nu_bins)[0]
        self.spec_reabsorbed_nu /= flux_scale

        self.spec_angstrom = units.Unit('Hz').to('angstrom', self.spec_nu, units.spectral())

        self.spec_flux_angstrom = (self.spec_flux_nu * self.spec_nu ** 2 / constants.c.cgs.value / 1e8)
        self.spec_reabsorbed_angstrom = (self.spec_reabsorbed_nu * self.spec_nu ** 2 / constants.c.cgs.value / 1e8)
        self.spec_virtual_flux_angstrom = (self.spec_virtual_flux_nu * self.spec_nu ** 2 / constants.c.cgs.value / 1e8)
Example #24
0
    def convert_3dps_to_1dps(self, k_edges):

        print "convert 3d power ot 1d power",
        print self.boxshape
        k_bin_x, k_bin_y, k_bin_z = self.get_k_bin_centre()

        k_bin_r = np.sqrt( (k_bin_x**2)[:, None, None] + 
                           (k_bin_y**2)[None, :, None] + 
                           (k_bin_z**2)[None, None, :] )

        ps_3d_flatten = copy.deepcopy(self.ps_3d.flatten())
        k_bin_r = k_bin_r.flatten()[np.isfinite(ps_3d_flatten)]
        ps_3d_flatten = ps_3d_flatten[np.isfinite(ps_3d_flatten)]

        kn_1d, edges = np.histogram(k_bin_r, k_edges)
        ps_1d, edges = np.histogram(k_bin_r, k_edges, weights=ps_3d_flatten)

        kn_1d = kn_1d.astype(float)
        #kn_1d[kn_1d==0] = np.inf
        ps_1d[kn_1d != 0] /= kn_1d[kn_1d != 0] 
        ps_1d[kn_1d == 0] = 0.
        #kn_1d[kn_1d==np.inf] = 0.

        self.kn_1d = kn_1d
        self.ps_1d = ps_1d
def process_two_time(lev, bufno,n ,    
                     g12, buf, num, num_buf,noqs,qind,nopr, dly ):
    '''a function for autocor_two_time'''
    num[lev]+=1  
    if lev==0:imin=0
    else:imin= int(num_buf/2 )
    for i in range(imin, min(num[lev],num_buf) ):
        ptr=lev*int(num_buf/2)+i    
        delayno=(bufno-i)%num_buf #//cyclic buffers            
        IP=buf[lev,delayno]
        IF=buf[lev,bufno]
        I_t12 =  (np.histogram(qind, bins=noqs, weights= IF*IP))[0]
        I_t1  =  (np.histogram(qind, bins=noqs, weights= IP))[0]
        I_t2  =  (np.histogram(qind, bins=noqs, weights= IF))[0]
        tind1 = (n-1)
        tind2=(n -dly[ptr] -1)
        
        if not isinstance( n, int ):                
            nshift = 2**(lev-1)                
            for i in range( -nshift+1, nshift +1 ):
                #print tind1+i
                g12[ int(tind1 + i), int(tind2 + i) ] =I_t12/( I_t1 * I_t2) * nopr
        else:
                #print tind1
            g12[ tind1, tind2 ]  =   I_t12/( I_t1 * I_t2) * nopr       
Example #26
0
def make_surrogates_epochs(epochs, check_pdf=False, random_state=None):
    '''
    Make surrogate epochs using sklearn. Destroy each trial by shuffling the time points only.
    The shuffling is performed in the time domain only. The probability density function is
    preserved.

    Parameters
    ----------
    epochs : Epochs Object.
    check_pdf : Condition to test for equal probability density. (bool)
    random_state : Seed for random generator.

    Output
    ------
    Surrogate Epochs object
    '''
    from sklearn.utils import check_random_state
    rng = check_random_state(random_state)

    surrogate = epochs.copy()
    surr = surrogate.get_data()
    for trial in range(len(surrogate)):
        for channel in range(len(surrogate.ch_names)):
            order = np.argsort(rng.randn(len(surrogate.times)))
            surr[trial, channel, :] = surr[trial, channel, order]
    surrogate._data = surr

    if check_pdf:
        hist, _ = np.histogram(data_trials.flatten())
        hist_dt = np.histogram(dt.flatten())
        assert np.array_equal(hist, hist_dt), 'The histogram values are unequal.'

    return surrogate
Example #27
0
def get_DT(T,s,e): # returns the Diversity Trajectory of s,e at times T (x10 faster)
	B=np.sort(np.append(T,T[0]+1))+.0001 # the + .0001 prevents problems with identical ages
	ss1 = np.histogram(s,bins=B)[0]
	ee2 = np.histogram(e,bins=B)[0]
	DD=(ss1-ee2)[::-1]
	#return np.insert(np.cumsum(DD),0,0)[0:len(T)]
	return np.cumsum(DD)[0:len(T)] 
Example #28
0
def makeCompression(X,bin_size,plotting=False):
    """
    Collects spectra frequencies
    """
    print "Compressing spectrum"
    tutto=[]
    
    for ind in X.index:
	row=[]
	row.append(ind)
	data = X.ix[ind,:].values	
	start_bins = X.ix[ind,:].values.shape[0]
	base = np.linspace(0,start_bins ,start_bins)
	bins = np.linspace(0,start_bins ,bin_size+1)

	bin_means1 = np.histogram(base, bins=bins, weights=data)[0]
	tmp = np.histogram(base, bins=bin_size)[0]
	bin_means1= bin_means1 / tmp
	for el in bin_means1:
	    row.append(el)
	tutto.append(row)

    newdf = pd.DataFrame(tutto).set_index(0)
    colnames = [ "z"+str(x) for x in xrange(newdf.shape[1]) ]
    newdf.columns=colnames
    if plotting:
	newdf.iloc[3,:].plot()
	plt.show()
    
    print newdf.head(10)
    ###print newdf.describe()
    return(newdf)
Example #29
0
def get_weights(target, actual, bins = 10, cap = 10, match = True):
	'''
	re-weights a actual distribution to a target.

	Args:
		target (array/list): observations drawn from target distribution
		actual (array/list): observations drawn from distribution to 
			match to the target.

		bins (numeric or list/array of numerics): bins to use to do weighting

		cap (numeric): maximum weight value.

		match (bool): whether to make the sum of weights in actual equal to the
			number of samples in target

	Returns:
		numpy.array: returns array of shape len(actual).

	'''
	target_counts, target_bins = np.histogram(target, bins=bins)
	counts, _ = np.histogram(actual, bins=target_bins)
	counts = (1.0 * counts)
	counts = np.array([max(a, 0.0001) for a in counts])
	multiplier = target_counts / counts

	weights = np.array([min(multiplier[target_bins.searchsorted(point) - 1], cap) for point in actual])

	if match:
		weights *= (len(target) / np.sum(weights))

	return weights
    def limitingMag(self, raftId, ccdId):
        if hasMinuit:
            try:
                return self.limitingMagMinuit(raftId, ccdId)
            except:
                pass

        matchedStar     = num.array(self.matchedStar.get(raftId, ccdId))
        blendedStar     = num.array(self.blendedStar.get(raftId, ccdId))
        undetectedStar  = num.array(self.undetectedStar.get(raftId, ccdId))

        allStars        = num.concatenate((matchedStar, blendedStar, undetectedStar))
        foundStars      = num.concatenate((matchedStar, blendedStar))
        histAll         = num.histogram(allStars, bins=self.bins)
        histFound       = num.histogram(foundStars, bins=self.bins)

        magbins = 0.5 * (histAll[1][1:] + histAll[1][:-1])
        w       = num.where(histAll[0] != 0)
        x       = magbins[w]
        n       = 1.0 * histFound[0][w]
        d       = 1.0 * histAll[0][w]
        y       = n / d

        binsize = self.bins[1] - self.bins[0]
        x = num.append(x, x[-1] + binsize)
        y = num.append(y, 0.0)

        for i in num.arange(len(y) - 1, 1, -1):
            if y[i] <= 0.5 and y[i-1] > 0.5:
                return (0.5 - y[i-1]) / (y[i] - y[i-1]) * (x[i] - x[i-1]) + x[i-1]
        return 0.0
Example #31
0
def fluor_fvfm(fdark, fmin, fmax, mask, filename, bins=1000):
    """Analyze PSII camera images.

    Inputs:
    fdark       = 16-bit grayscale fdark image
    fmin        = 16-bit grayscale fmin image
    fmax        = 16-bit grayscale fmax image
    mask        = mask of plant (binary,single channel)
    filename    = name of file
    bins        = number of bins from 0 to 65,536 (default is 1000)

    Returns:
    hist_header = fvfm data table headers
    hist_data   = fvfm data table values

    :param fdark: numpy.ndarray
    :param fmin: numpy.ndarray
    :param fmax: numpy.ndarray
    :param mask: numpy.ndarray
    :param filename: str
    :param bins: int
    :return hist_header: list
    :return hist_data: list
    """

    # Auto-increment the device counter
    params.device += 1
    # Check that fdark, fmin, and fmax are grayscale (single channel)
    if not all(len(np.shape(i)) == 2 for i in [fdark, fmin, fmax]):
        fatal_error("The fdark, fmin, and fmax images must be grayscale images.")
    # Check that fdark, fmin, and fmax are 16-bit images
    if not all(i.dtype == "uint16" for i in [fdark, fmin, fmax]):
        fatal_error("The fdark, fmin, and fmax images must be 16-bit images.")

    # QC Fdark Image
    fdark_mask = cv2.bitwise_and(fdark, fdark, mask=mask)
    if np.amax(fdark_mask) > 2000:
        qc_fdark = False
    else:
        qc_fdark = True

    # Mask Fmin and Fmax Image
    fmin_mask = cv2.bitwise_and(fmin, fmin, mask=mask)
    fmax_mask = cv2.bitwise_and(fmax, fmax, mask=mask)

    # Calculate Fvariable, where Fv = Fmax - Fmin (masked)
    fv = np.subtract(fmax_mask, fmin_mask)

    # When Fmin is greater than Fmax, a negative value is returned.
    # Because the data type is unsigned integers, negative values roll over, resulting in nonsensical values
    # Wherever Fmin is greater than Fmax, set Fv to zero
    fv[np.where(fmax_mask < fmin_mask)] = 0

    # Calculate Fv/Fm (Fvariable / Fmax) where Fmax is greater than zero
    # By definition above, wherever Fmax is zero, Fvariable will also be zero
    # To calculate the divisions properly we need to change from unit16 to float64 data types
    fvfm = fv.astype(np.float64)
    fmax_flt = fmax_mask.astype(np.float64)
    fvfm[np.where(fmax_mask > 0)] /= fmax_flt[np.where(fmax_mask > 0)]

    # Calculate the median Fv/Fm value for non-zero pixels
    fvfm_median = np.median(fvfm[np.where(fvfm > 0)])

    # Calculate the histogram of Fv/Fm non-zero values
    fvfm_hist, fvfm_bins = np.histogram(fvfm[np.where(fvfm > 0)], bins, range=(0, 1))
    # fvfm_bins is a bins + 1 length list of bin endpoints, so we need to calculate bin midpoints so that
    # the we have a one-to-one list of x (FvFm) and y (frequency) values.
    # To do this we add half the bin width to each lower bin edge x-value
    midpoints = fvfm_bins[:-1] + 0.5 * np.diff(fvfm_bins)

    # Calculate which non-zero bin has the maximum Fv/Fm value
    max_bin = midpoints[np.argmax(fvfm_hist)]

    # Store Fluorescence Histogram Data
    hist_header = (
        'HEADER_HISTOGRAM',
        'bin-number',
        'fvfm_bins',
        'fvfm_hist',
        'fvfm_hist_peak',
        'fvfm_median',
        'fdark_passed_qc'
    )

    hist_data = (
        'FLU_DATA',
        bins,
        np.around(midpoints, decimals=len(str(bins))).tolist(),
        fvfm_hist.tolist(),
        float(max_bin),
        float(np.around(fvfm_median, decimals=4)),
        qc_fdark
    )

    if filename:
        import matplotlib
        matplotlib.use('Agg', warn=False)
        from matplotlib import pyplot as plt
        from matplotlib import cm as cm

        # Print F-variable image
        print_image(fv, (str(filename[0:-4]) + '_fv_img.png'))
        print('\t'.join(map(str, ('IMAGE', 'fv', str(filename[0:-4]) + '_fv_img.png'))))

        # Create Histogram Plot, if you change the bin number you might need to change binx so that it prints
        # an appropriate number of labels
        binx = int(bins / 50)
        plt.plot(midpoints, fvfm_hist, color='green', label='Fv/Fm')
        plt.xticks(list(midpoints[0::binx]), rotation='vertical', size='xx-small')
        plt.legend()
        ax = plt.subplot(111)
        ax.set_ylabel('Plant Pixels')
        ax.text(0.05, 0.95, ('Peak Bin Value: ' + str(max_bin)), transform=ax.transAxes, verticalalignment='top')
        plt.grid()
        plt.title('Fv/Fm of ' + str(filename[0:-4]))
        fig_name = (str(filename[0:-4]) + '_fvfm_hist.svg')
        plt.savefig(fig_name)
        plt.clf()
        print('\t'.join(map(str, ('IMAGE', 'hist', fig_name))))

        # Pseudocolored Fv/Fm image
        fvfm_8bit = fvfm * 255
        fvfm_8bit = fvfm_8bit.astype(np.uint8)
        plt.imshow(fvfm_8bit, vmin=0, vmax=1, cmap=cm.jet_r)
        plt.subplot(111)
        mask_inv = cv2.bitwise_not(mask)
        background = np.dstack((mask, mask, mask, mask_inv))
        my_cmap = plt.get_cmap('binary_r')
        plt.imshow(background, cmap=my_cmap)
        plt.axis('off')
        fig_name = (str(filename[0:-4]) + '_pseudo_fvfm.png')
        plt.savefig(fig_name, dpi=600, bbox_inches='tight')
        plt.clf()
        print('\t'.join(map(str, ('IMAGE', 'pseudo', fig_name))))

        path = os.path.dirname(filename)
        fig_name = 'FvFm_pseudocolor_colorbar.svg'
        if not os.path.isfile(path + '/' + fig_name):
            plot_colorbar(path, fig_name, 2)

    if params.debug == 'print':
        print_image(fmin_mask, os.path.join(params.debug_outdir, str(params.device) + '_fmin_mask.png'))
        print_image(fmax_mask, os.path.join(params.debug_outdir, str(params.device) + '_fmax_mask.png'))
        print_image(fv, os.path.join(params.debug_outdir, str(params.device) + '_fv_convert.png'))
    elif params.debug == 'plot':
        plot_image(fmin_mask, cmap='gray')
        plot_image(fmax_mask, cmap='gray')
        plot_image(fv, cmap='gray')

    return hist_header, hist_data
Example #32
0
tot = data.number.gas + data.number.bh + data.number.star
part = data.number.part
dm = part - tot
radii = data.radii.rvir


halos = []
spread = numpy.array([])
for i in range(len(dm)):   
    halos.append(distribute_particles(dm[i], radii[i]))
    spread = numpy.append(spread, halos[i].spreads)

n_bins = 100

g_hist, g_bin_edges = numpy.histogram(spread, n_bins, density = True)
g_bin_centers = numpy.zeros(len(g_hist))

for i in range(len(g_bin_centers)):
    g_bin_centers[i] = (g_bin_edges[i] + g_bin_edges[i+1])/2


dm_spread = numpy.loadtxt('/cosma5/data/durham/dc-murr1/dm_spread.txt')
gas_spread = numpy.loadtxt('/cosma5/data/durham/dc-murr1/gas_spread.txt')
disp = numpy.loadtxt('/cosma5/data/durham/dc-murr1/dm_displacements.txt')
x = numpy.loadtxt('/cosma5/data/durham/dc-murr1/ids.txt')
bound_dm_spread = dm_spread[numpy.where(x >= 0)]
unbound_dm_spread = dm_spread[numpy.where(x < 0)]


bound_dm_hist, bound_dm_bin_edges = numpy.histogram(bound_dm_spread, n_bins, density = True)
Example #33
0
def otsu(src_path = 'lena.jpg', dst_path = 'output.jpg'):
    #== Перед применением алгоритма Оцу, целесообразно выполнить сглаживание ==
    # img_orig = cv2.imread(src_path)
    # width, height = img_orig.shape[1], img_orig.shape[0]
    # img_sum = np.cumsum(np.cumsum(img_orig, 0), 1)
    # img_blur = np.copy(img_orig)
    # w, h = 9, 9
    #
    # for i in range(h // 2 + 1, height - h):
    #     i1, i2 = i - (h // 2) - 1, i + (h // 2)
    #     for j in range(w // 2 + 1, width - w):
    #         j1, j2 = j - (w // 2) - 1, j + (w // 2)
    #         sum_ = img_sum[i2][j2] + img_sum[i1][j1] - img_sum[i1][j2] - img_sum[i2][j1]
    #         img_blur[i][j] = sum_ // (w * h)
    # =====================================================================
    img_orig = plt.imread(src_path)
    #img_gray = rgb2gray(img_blur)
    img_gray = rgb2gray(img_orig)
    hist = np.histogram(img_gray, bins = range(257))[0]
    min, max = 0, 0

    for i in range(256):
        if hist[i] > 0:
            min = i
            break
    for i in range(255, -1, -1):
        if hist[i] > 0:
            max = i
            break

    hist_size = max - min + 1
    num, sum_val = sum(hist), 0
    for i in range(min, max + 1):
        sum_val += hist[i] * i

    sig_max, thresh, sum1, num1 = -1, 0, 0, 0
    for i in range(min, max):
        num1 += hist[i]
        sum1 += hist[i] * i
        p1 = num1 / num
        avg1 = sum1 / num1
        avg2 = (sum_val - sum1) / (num - num1)
        sig = p1 * (1 - p1) * (avg1 - avg2) ** 2
        if sig > sig_max:
            sig_max = sig
            thresh = i

    for i in range(np.size(img_gray, 0)):
        for j in range(np.size(img_gray, 1)):
            if img_gray[i][j] <= thresh:
                img_gray[i][j] = 0
            else:
                img_gray[i][j] = 255

    fig = plt.figure(figsize=(15, 8))
    fig.add_subplot(1, 2, 1)
    plt.title('Original Image')
    plt.imshow(img_orig)
    #plt.imshow(cv2.cvtColor(img_orig, cv2.COLOR_BGR2RGB))
    fig.add_subplot(1, 2, 2)
    plt.title('Rendered Image')
    plt.imshow(img_gray, cmap='gray')
    #fig.savefig(dst_path)
    plt.show()
    img.imsave(dst_path, img_gray, cmap = 'gray')
fig.savefig(name+"_match_vs_tmplttau0_vs_tmpltchi.png")

fig = Figure()
ax = fig.add_subplot(111)
collection = ax.scatter(tmplt_M, tmplt_chi, c=match, s=20, vmin=smallest_match, linewidth=0, alpha=0.5, vmax=1)
ax.set_xlabel(r"Template $M_{total}$ ($M_\odot$)")
ax.set_ylabel(r"Template $%s$" % tmplt_chi_label)
ax.set_title(r"Colorbar is Fitting Factor; assuming $f_\mathrm{low}=%d\,\mathrm{Hz}$" % flow)
ax.grid(True)
fig.colorbar(collection, ax=ax).set_label("Fitting Factor")
canvas = FigureCanvas(fig)
fig.savefig(name+"_match_vs_tmpltM_vs_tmpltchi.png")

fig = Figure()
ax = fig.add_axes((0.1, 0.1, 0.85, 0.85), xscale="log", yscale="log")
count, bin_edges = np.histogram(np.log(1 - match + 1e-5), bins=40) # fudge factor to avoid log(0)
bin_centers = np.exp(-(bin_edges[:-1] * bin_edges[1:])**0.5)
ax.plot(bin_centers, count, linewidth=2.5)
ax.plot((1 - min_match, 1 - min_match), (1, 10**ceil(log10(count.max()))), "k--")
ax.set_xlabel("mismatch (1 - fitting factor)")
ax.set_ylabel("Number")
ax.set_title(r'$N_\mathrm{inj}=%d$, 10th percentile=%.2f' % (len(match), min_match,))
ax.grid(True)
canvas = FigureCanvas(fig)
fig.savefig(name+"_match_hist.png")

fig = Figure()
ax = fig.add_subplot(111)
collection = ax.scatter(inj_M, inj_chi, c=match, s=20, vmin=smallest_match, linewidth=0, alpha=0.5, vmax=1)
ax.set_xlabel("Injected Total Mass")
ax.set_ylabel("Injected $%s$" % inj_chi_label)
Example #35
0
def example_pass_fails(model, train_loader, test_loader, results_dir,
                       grad_cam):
    model.eval()

    histogram = np.zeros((256))

    bins = np.linspace(0.0, 1.0, 257)

    os.makedirs(results_dir + '/imgs/', exist_ok=True)
    for batch_idx, (data, target) in enumerate(train_loader):

        data, target = Variable(data, volatile=True), Variable(target).type(
            torch.cuda.LongTensor)

        target_batch = target.data.cpu().numpy()
        image_batch = data.data.cpu().numpy()

        for img in image_batch:

            try:
                histo = np.histogram(img, bins=bins)
                histogram += histo[0]
            except KeyError:
                print('Site missing')

        if batch_idx == 0:
            print(target_batch.shape, image_batch.shape)

        try:
            for i in range(data.shape[0]):
                if target_batch[i] == 0:
                    qc_decision = 'FAIL'
                else:
                    qc_decision = 'PASS'

                plt.close()
                plt.imshow(image_batch[i, 0, :, :],
                           cmap='gray',
                           origin='lower')
                plt.axis('off')
                filename = results_dir + '/imgs/' + qc_decision + '_' + str(
                    batch_idx) + '_img_' + str(i) + '.png'
                plt.savefig(filename, bbox_inches='tight')
        except IndexError as e:
            print('Couldnt save one file')

    for batch_idx, (data, target) in enumerate(test_loader):
        data, target = Variable(data, volatile=True), Variable(target).type(
            torch.cuda.LongTensor)

        target_batch = target.data.cpu().numpy()
        image_batch = data.data.cpu().numpy()

        for img in image_batch:
            try:
                histo = np.histogram(img, bins=bins)
                histogram += histo[0]
            except KeyError:
                print('Site missing')

        if batch_idx == 0:
            print(target_batch.shape, image_batch.shape)

        try:
            for i in range(data.shape[0]):
                if target_batch[i] == 0:
                    qc_decision = 'FAIL'
                else:
                    qc_decision = 'PASS'

                    # mask = grad_cam(data[i, ...][np.newaxis, ...], target[i, ...][np.newaxis, ...])
                    #
                    # heatmap = np.uint8(cm.jet(mask)[:,:,0,:3]*255)
                    # gray = np.uint8(cm.gray(data[i, ...]))
                    #
                    # fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(9, 4.5), tight_layout=True)
                    # ax1.imshow(image_batch[i, 0, :, :], cmap='gray', origin='lower')
                    # ax2.imshow(gray, origin='lower')
                    # ax2.imshow(heatmap, alpha=0.2, origin='lower')
                    #
                    # plt.savefig(results_dir + '/imgs/' + qc_decision + '_ds030_batch_' + str(batch_idx) + '_img_' + str(i) + '.png', bbox_inches='tight')
        except IndexError as e:
            pass

    plt.figure()
    plt.plot(bins[:-1], histogram, lw=2)

    plt.title('histogram of grey values', fontsize='24')
    plt.tight_layout()
    plt.savefig(results_dir + 'histograms.png', bbox_inches='tight')
Example #36
0
    def readWholeTrainSet(self, fileName, train_flag=True):
        """to read the whole train set of current dataset.
        Args:
        fileName: train set file that stores the image locations
        trainStg: if processing training or validation data

        return: 0 if successful
        """
        global_hist = np.zeros(self.classes, dtype=np.float32)

        no_files = 0
        min_val_al = 0
        max_val_al = 0
        with open(self.data_dir + '/' + fileName, 'r') as textFile:
            # with open(fileName, 'r') as textFile:
            for line in textFile:
                # we expect the text file to contain the data in following format
                # <RGB Image> <Label Image>
                img_name, msk_name, y0, y1, x0, x1 = line.split()
                x0, y0, x1, y1 = int(x0), int(y0), int(x1), int(y1)

                img_file = ((self.data_dir).strip() + '/' + img_name).strip()
                label_file = ((self.data_dir).strip() + '/' + msk_name).strip()

                label_img = cv2.imread(label_file, 0)[y0: y1, x0: x1]
                label_img[label_img > 128] = 1
                label_img[label_img < 128] = 0
                unique_values = np.unique(label_img)
                max_val = max(unique_values)
                min_val = min(unique_values)

                max_val_al = max(max_val, max_val_al)
                min_val_al = min(min_val, min_val_al)

                if train_flag == True:
                    hist = np.histogram(label_img, self.classes, [0, self.classes - 1])
                    global_hist += hist[0]

                    rgb_img = cv2.imread(img_file)[y0: y1, x0: x1]
                    self.mean[0] += np.mean(rgb_img[:, :, 0])
                    self.mean[1] += np.mean(rgb_img[:, :, 1])
                    self.mean[2] += np.mean(rgb_img[:, :, 2])

                    self.std[0] += np.std(rgb_img[:, :, 0])
                    self.std[1] += np.std(rgb_img[:, :, 1])
                    self.std[2] += np.std(rgb_img[:, :, 2])

                else:
                    print("we can only collect statistical information of train set, please check")

                if max_val > (self.classes - 1) or min_val < 0:
                    print('Labels can take value between 0 and number of classes.')
                    print('Some problem with labels. Please check. label_set:', unique_values)
                    print('Label Image ID: ' + label_file)
                no_files += 1

        # divide the mean and std values by the sample space size
        self.mean /= no_files
        self.std /= no_files
        self.mean /= 255
        self.std /= 255

        # compute the class imbalance information
        self.compute_class_weights(global_hist)
        return 0
H0_EM, q0, j0, loglikelihood = data[:,0], data[:,1], data[:,2], data[:,3]
idx = np.argmax(loglikelihood)
H0_best, q0_best, j0_best = data[idx,0:-1]

bins = np.arange(5,150,1)

fig = plt.figure(figsize=(9,6))
ax = plt.subplot(111)
for ii, name in enumerate(data_struct.keys()):
    color_name = color_names[ii]

    kdedir = data_struct[name]["kdedir_H0"]
    plt.plot(bins, [kde_eval_single(kdedir,[d])[0] for d in bins], color = color_name, linestyle='-.',label=name, linewidth=3, zorder=10)

hist_1, bin_edges_1 = np.histogram(H0_EM, bins, density=True)
kdedir = greedy_kde_areas_1d(H0_EM)
plt.plot(bins, [kde_eval_single(kdedir,[d])[0] for d in bins], color = 'k', linestyle='-',label="Combined", linewidth=3, zorder=10)

boxes = []
planck_mu, planck_std = 67.74, 0.46 
shoes_mu, shoes_std = 74.03, 1.42
superluminal_mu, superluminal_std = 68.9, 4.6
plt.plot([planck_mu,planck_mu],[0,1],alpha=0.3, color='g',label='Planck')
rect1 = Rectangle((planck_mu - planck_std, 0), 2*planck_std, 1, alpha=0.8, color='g')
rect2 = Rectangle((planck_mu - 2*planck_std, 0), 4*planck_std, 1, alpha=0.5, color='g')
plt.plot([shoes_mu,shoes_mu],[0,1],alpha=0.3, color='r',label='SHoES')
rect3 = Rectangle((shoes_mu - shoes_std, 0), 2*shoes_std, 1, alpha=0.8, color='r')
rect4 = Rectangle((shoes_mu - 2*shoes_std, 0), 4*shoes_std, 1, alpha=0.5, color='r')
plt.plot([superluminal_mu,superluminal_mu],[0,1],alpha=0.3, color='c',label='Superluminal')
rect5 = Rectangle((superluminal_mu - superluminal_std, 0), 2*superluminal_std, 0.12, alpha=0.3, color='c')
Example #38
0
def pairCorrelationFunction_2D(x, y, S, rMax, dr):
    """Compute the two-dimensional pair correlation function, also known
    as the radial distribution function, for a set of circular particles
    contained in a square region of a plane.  This simple function finds
    reference particles such that a circle of radius rMax drawn around the
    particle will fit entirely within the square, eliminating the need to
    compensate for edge effects.  If no such particles exist, an error is
    returned. Try a smaller rMax...or write some code to handle edge effects! ;)
    Arguments:
        x               an array of x positions of centers of particles
        y               an array of y positions of centers of particles
        S               length of each side of the square region of the plane
        rMax            outer diameter of largest annulus
        dr              increment for increasing radius of annulus
    Returns a tuple: (g, radii, interior_indices)
        g(r)            a numpy array containing the correlation function g(r)
        radii           a numpy array containing the radii of the
                        annuli used to compute g(r)
        reference_indices   indices of reference particles
    """
    from numpy import zeros, sqrt, where, pi, mean, arange, histogram
    # Number of particles in ring/area of ring/number of reference particles/number density
    # area of ring = pi*(r_outer**2 - r_inner**2)

    # Find particles which are close enough to the box center that a circle of radius
    # rMax will not cross any edge of the box
    bools1 = x > rMax
    bools2 = x < (S - rMax)
    bools3 = y > rMax
    bools4 = y < (S - rMax)
    interior_indices, = where(bools1 * bools2 * bools3 * bools4)
    num_interior_particles = len(interior_indices)

    if num_interior_particles < 1:
        raise RuntimeError(
            "No particles found for which a circle of radius rMax\
                will lie entirely within a square of side length S.  Decrease rMax\
                or increase the size of the square.")

    edges = arange(0., rMax + 1.1 * dr, dr)
    num_increments = len(edges) - 1
    g = zeros([num_interior_particles, num_increments])
    radii = zeros(num_increments)
    numberDensity = len(x) / (S**2)

    # Compute pairwise correlation for each interior particle
    for p in range(num_interior_particles):
        index = interior_indices[p]
        d = sqrt((x[index] - x)**2 + (y[index] - y)**2)
        d[index] = 2 * rMax

        (result, bins) = histogram(d, bins=edges, normed=False)
        g[p, :] = result / numberDensity

    # Average g(r) for all interior particles and compute radii
    g_average = zeros(num_increments)
    for i in range(num_increments):
        radii[i] = (edges[i] + edges[i + 1]) / 2.
        rOuter = edges[i + 1]
        rInner = edges[i]
        g_average[i] = mean(g[:, i]) / (pi * (rOuter**2 - rInner**2))

    return (g_average, radii, interior_indices)
Example #39
0
def estimate_diff(preds, target, bw_mix=0.05, bw_pos=0.1, kde_mode='logit', threshold=None, k_neighbours=None,
                  tune=False, MT=True, MT_coef=0.2, decay_MT_coef=False, kde_type='kde',
                  n_gauss_mix=20, n_gauss_pos=10, bins_mix=20, bins_pos=20):
    """
    Estimates densities of predictions y(x) for P and U and ratio between them f_p / f_u for U sample;
        uses kernel density estimation (kde);
        post-processes difference of estimated densities - imposes monotonicity on lower preds
        (so that diff is partly non-decreasing) and applies rolling median to further reduce variance
    :param preds: predictions of NTC y(x), probability of belonging to U rather than P, np.array with shape (n,)
    :param target: binary vector, 0 if positive, 1 if unlabeled, np.array with shape (n,)
    :param bw_mix: bandwidth for kde of U
    :param bw_pos: bandwidth for kde of P
    :param kde_mode: 'prob', 'log_prob' or 'logit'; default is 'logit'
    :param monotonicity: monotonicity is imposed on density difference for predictions below this number, float in [0, 1]
    :param k_neighbours: difference is relaxed with median rolling window with size k_neighbours * 2 + 1,
        default = int(preds[target == 1].shape[0] // 10)

    :return: difference of densities f_p / f_u for U sample
    """

    if kde_mode is None:
        kde_mode = 'logit'

    if (threshold is None) or (threshold == 'mid'):
        threshold = preds[target == 1].mean() / 2 + preds[target == 0].mean() / 2
    elif threshold == 'low':
        threshold = preds[target == 0].mean()
    elif threshold == 'high':
        threshold = preds[target == 1].mean()

    if k_neighbours is None:
        k_neighbours = int(preds[target == 1].shape[0] // 20)

    if kde_mode == 'prob':
        kde_inner_fun = lambda x: x
        kde_outer_fun = lambda dens, x: dens(x)
    elif kde_mode == 'log_prob':
        kde_inner_fun = lambda x: np.log(x)
        kde_outer_fun = lambda dens, x: dens(np.log(x)) / (x + 10 ** -5)
    elif kde_mode == 'logit':
        kde_inner_fun = lambda x: np.log(x / (1 - x + 10 ** -5))
        kde_outer_fun = lambda dens, x: dens(np.log(x / (1 - x + 10 ** -5))) / (x * (1 - x) + 10 ** -5)

    if kde_type == 'kde':
        if tune:
            bw_mix = maximize_log_likelihood(preds[target == 1], kde_inner_fun, kde_outer_fun, kde_type=kde_type)
            bw_pos = maximize_log_likelihood(preds[target == 0], kde_inner_fun, kde_outer_fun, kde_type=kde_type)

        kde_mix = gaussian_kde(np.apply_along_axis(kde_inner_fun, 0, preds[target == 1]), bw_mix)
        kde_pos = gaussian_kde(np.apply_along_axis(kde_inner_fun, 0, preds[target == 0]), bw_pos)

    elif kde_type == 'GMM':
        if tune:
            n_gauss_mix = maximize_log_likelihood(preds[target == 1], kde_inner_fun, kde_outer_fun, kde_type=kde_type)
            n_gauss_pos = maximize_log_likelihood(preds[target == 0], kde_inner_fun, kde_outer_fun, kde_type=kde_type)

        GMM_mix = GaussianMixture(n_gauss_mix, covariance_type='spherical').fit(
            np.apply_along_axis(kde_inner_fun, 0, preds[target == 1]).reshape(-1, 1))
        GMM_pos = GaussianMixture(n_gauss_pos, covariance_type='spherical').fit(
            np.apply_along_axis(kde_inner_fun, 0, preds[target == 0]).reshape(-1, 1))

        kde_mix = lambda x: np.exp(GMM_mix.score_samples(x.reshape(-1, 1)))
        kde_pos = lambda x: np.exp(GMM_pos.score_samples(x.reshape(-1, 1)))

    elif kde_type == 'hist':
        if tune:
            bins_mix = maximize_log_likelihood(preds[target == 1], kde_inner_fun, lambda kde, x: kde(x),
                                               kde_type=kde_type)
            bins_pos = maximize_log_likelihood(preds[target == 0], kde_inner_fun, lambda kde, x: kde(x),
                                               kde_type=kde_type)
        bars_mix = np.histogram(preds[target == 1], bins=bins_mix, range=(0, 1), density=True)[0]
        bars_pos = np.histogram(preds[target == 0], bins=bins_pos, range=(0, 1), density=True)[0]

        kde_mix = lambda x: bars_mix[np.clip((x // (1 / bins_mix)).astype(int), 0, bins_mix-1)]
        kde_pos = lambda x: bars_pos[np.clip((x // (1 / bins_pos)).astype(int), 0, bins_pos-1)]
        kde_outer_fun = lambda kde, x: kde(x)

    # sorting to relax and impose monotonicity
    sorted_mixed = np.sort(preds[target == 1])

    diff = np.apply_along_axis(lambda x: kde_outer_fun(kde_pos, x) / (kde_outer_fun(kde_mix, x) + 10 ** -5), axis=0,
                               arr=sorted_mixed)
    diff[diff > 50] = 50
    diff = rolling_apply(diff, 5)
    diff = np.append(
        np.flip(np.maximum.accumulate(np.flip(diff[sorted_mixed <= threshold], axis=0)), axis=0),
        diff[sorted_mixed > threshold])
    diff = rolling_apply(diff, k_neighbours)

    if MT:
        MTrends = MonotonizingTrends(MT_coef=MT_coef)
        diff = np.flip(np.array(MTrends.monotonize_array(np.flip(diff, axis=0), reset=True, decay_MT_coef=decay_MT_coef)), axis=0)

    diff.sort()
    diff = np.flip(diff, axis=0)

    # desorting
    diff = diff[np.argsort(np.argsort(preds[target == 1]))]

    return diff
Example #40
0
def pairCorrelationFunction_3D(x, y, z, S, rMax, dr):
    """Compute the three-dimensional pair correlation function for a set of
    spherical particles contained in a cube with side length S.  This simple
    function finds reference particles such that a sphere of radius rMax drawn
    around the particle will fit entirely within the cube, eliminating the need
    to compensate for edge effects.  If no such particles exist, an error is
    returned.  Try a smaller rMax...or write some code to handle edge effects! ;)
    Arguments:
        x               an array of x positions of centers of particles
        y               an array of y positions of centers of particles
        z               an array of z positions of centers of particles
        S               length of each side of the cube in space
        rMax            outer diameter of largest spherical shell
        dr              increment for increasing radius of spherical shell
    Returns a tuple: (g, radii, interior_indices)
        g(r)            a numpy array containing the correlation function g(r)
        radii           a numpy array containing the radii of the
                        spherical shells used to compute g(r)
        reference_indices   indices of reference particles
    """
    from numpy import zeros, sqrt, where, pi, mean, arange, histogram

    # Find particles which are close enough to the cube center that a sphere of radius
    # rMax will not cross any face of the cube
    bools1 = x > rMax
    bools2 = x < (S - rMax)
    bools3 = y > rMax
    bools4 = y < (S - rMax)
    bools5 = z > rMax
    bools6 = z < (S - rMax)

    interior_indices, = where(bools1 * bools2 * bools3 * bools4 * bools5 *
                              bools6)
    num_interior_particles = len(interior_indices)

    if num_interior_particles < 1:
        raise RuntimeError(
            "No particles found for which a sphere of radius rMax\
                will lie entirely within a cube of side length S.  Decrease rMax\
                or increase the size of the cube.")

    edges = arange(0., rMax + 1.1 * dr, dr)
    num_increments = len(edges) - 1
    g = zeros([num_interior_particles, num_increments])
    radii = zeros(num_increments)
    numberDensity = len(x) / S**3

    # Compute pairwise correlation for each interior particle
    for p in range(num_interior_particles):
        index = interior_indices[p]
        d = sqrt((x[index] - x)**2 + (y[index] - y)**2 + (z[index] - z)**2)
        d[index] = 2 * rMax

        (result, bins) = histogram(d, bins=edges, normed=False)
        g[p, :] = result / numberDensity

    # Average g(r) for all interior particles and compute radii
    g_average = zeros(num_increments)
    for i in range(num_increments):
        radii[i] = (edges[i] + edges[i + 1]) / 2.
        rOuter = edges[i + 1]
        rInner = edges[i]
        g_average[i] = mean(g[:, i]) / (4.0 / 3.0 * pi *
                                        (rOuter**3 - rInner**3))

    return (g_average, radii, interior_indices)
    # Number of particles in shell/total number of particles/volume of shell/number density
    # shell volume = 4/3*pi(r_outer**3-r_inner**3)


####
Example #41
0
def create_1d_hist(fig, ax, hist, title=None, x_axis_title=None, y_axis_title=None, bins=101, x_min=None, x_max=None):
    if hist.all() is np.ma.masked:
        median = 0.0
        mean = 0.0
        rms = 0.0
    else:
        median = np.ma.median(hist)
        mean = np.ma.mean(hist)
        rms = np.ma.std(hist, dtype=np.float64)
    if x_min is None:
        x_min = 0.0
    if x_max is None:
        if hist.all() is np.ma.masked:  # check if masked array is fully masked
            x_max = 1.0
        else:
            x_max = math.ceil(hist.max())
    hist_bins = int(x_max - x_min) + 1 if bins is None else bins
    if hist_bins > 1:
        bin_width = (x_max - x_min) / (hist_bins - 1.0)
    else:
        bin_width = 1.0
    hist_range = (x_min - bin_width / 2.0, x_max + bin_width / 2.0)
    masked_hist = np.ma.masked_array(hist, copy=True)
    if masked_hist.dtype.kind in 'ui':
        masked_hist[masked_hist.mask] = np.iinfo(masked_hist.dtype).max
    elif masked_hist.dtype.kind in 'f':
        masked_hist[masked_hist.mask] = np.finfo(masked_hist.dtype).max
    else:
        raise TypeError('Inappropriate type %s' % masked_hist.dtype)
    _, _, _ = ax.hist(x=masked_hist.compressed(), bins=hist_bins, range=hist_range, align='mid')  # re-bin to 1d histogram, x argument needs to be 1D
    # BUG: np.ma.compressed(np.ma.masked_array(hist, copy=True)) (2D) is not equal to np.ma.masked_array(hist, copy=True).compressed() (1D) if hist is ndarray
    ax.set_xlim(hist_range)  # overwrite xlim
    if hist.all() is np.ma.masked:  # or np.allclose(hist, 0.0):
        ax.set_ylim((0, 1))
        ax.set_xlim((-0.5, +0.5))
    # create histogram without masked elements, higher precision when calculating gauss
    h_1d, h_bins = np.histogram(np.ma.masked_array(hist, copy=True).compressed(), bins=hist_bins, range=hist_range)
    if title is not None:
        ax.set_title(title)
    if x_axis_title is not None:
        ax.set_xlabel(x_axis_title)
    if y_axis_title is not None:
        ax.set_ylabel(y_axis_title)
    bin_centres = (h_bins[:-1] + h_bins[1:]) / 2.0
    amplitude = np.amax(h_1d)

    # defining gauss fit function
    def gauss(x, *p):
        A, mu, sigma = p
        return A * np.exp(-(x - mu) ** 2.0 / (2.0 * sigma ** 2.0))

    p0 = (amplitude, mean, rms)  # p0 is the initial guess for the fitting coefficients (A, mu and sigma above)
    try:
        coeff, _ = curve_fit(gauss, bin_centres, h_1d, p0=p0)
        hist_fit = gauss(bin_centres, *coeff)
        ax.plot(bin_centres, hist_fit, "r--", label='Gauss fit')
        chi2 = 0.0
        for i in range(0, len(h_1d)):
            chi2 += (h_1d[i] - gauss(h_bins[i], *coeff)) ** 2.0
        textright = '$\mu=%.2f$\n$\sigma=%.2f$\n$\chi2=%.2f$' % (coeff[1], coeff[2], chi2)
        props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
        ax.text(0.85, 0.9, textright, transform=ax.transAxes, fontsize=8, verticalalignment='top', bbox=props)
    except RuntimeError, e:
        logging.info('Plot 1d histogram: gauss fit failed, %s' % e)
Example #42
0
#print differences.argmin()
# threshold = 2000.0

#no_ion_iter = np.where(sums < threshold)[0].min() + 1 #cut off at first point that's below the threshold
# no_ion_iter = 1001.0
#cut the iterations and timetags arrays to when we had the ion
# iterations = iterations[np.where(iterations < iters[no_ion_iter])]
# timetags = timetags[np.where(iterations < iters[no_ion_iter])]
# print 'Completed {} iterations before losing ion'.format(iterations.max() + 1)

cxn = labrad.connect()
dv = cxn.data_vault
dv.cd(c.datavault_dir)
dv.open(1)
# start_recording_timetags = dv.get_parameter('start_recording_timetags')
# cycle_time = dv.get_parameter('timetag_record_cycle')
start_recording_timetags = 0.001166 #doppler cooling , doppler cooling repump additional, sequence start, turn of all time
cycle_time = 0.000056
bin_size = 10e-9
print timetags.min()
print timetags.max()
timetags = (timetags - start_recording_timetags) % cycle_time
bin_edges = 1 + cycle_time / bin_size
print bin_edges
bins = np.linspace(0,cycle_time, bin_edges)
hist = np.histogram(timetags, bins)[0]
#save the binned data
together = np.vstack((bins[:-1], hist)).transpose()
bin_filename = c.bin_filename
np.save(bin_filename, together)
print 'DONE BINNING'
        plt.ylabel('Reliability')
        plt.title('Reliability of %s responses' % exp_type)
        plt.legend(handles)
        box = ax.get_position()
        ax.set_position([box.x0, box.y0, box.width * 0.85, box.height])
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        fig.savefig(os.path.join(PLOTS_DIR, 'clustering',
                                 'reliability', 'mouse-%s' % m.name,
                                 '%d.eps' % i),
                    bbox_inches='tight')
        plt.close()

rel_all= np.hstack([m.sc_cluster_reliability.flatten() for m in data]).flatten()
print exp_type, np.median(rel_all)
# Histogram of reliability values.
n_bins = 50
fig = plt.figure()
fig.set_size_inches(3, 2)
x = np.linspace(np.min(rel_all), np.max(rel_all), num=n_bins+1)[:-1]
d = (x[-1] - x[0]) / (n_bins-1)
y = np.histogram(rel_all, bins=n_bins)[0]
plt.bar(x, y, d)
plt.locator_params(nbins=3)
plt.xlabel('Reliability')
plt.ylabel('Count')
plt.title('Reliability value histogram')
fig.savefig(os.path.join(PLOTS_DIR, 'clustering', 'reliability',
                         'cluster-reliability-histogram_sc-%s.eps'%exp_type),
            bbox_inches='tight')
plt.close()
def agg_num_bins(x):
    bins, _ = np.histogram(x, bins=10)
    return np.sum(bins > 0)
Example #45
0
    def img_callback(self, msg):
        start_time = time.time()
        # print(len(self.tiles))
        font = cv2.FONT_HERSHEY_SIMPLEX
        color = (0, 0, 255)

        if self.frame_counter % self.skip == 0:
            # self.tiles=[]
            img = self.bridge.imgmsg_to_cv2(msg, "bgr8")

            res = img.copy()
            h, w = img.shape[:2]

            M = cv2.getRotationMatrix2D((w / 2, h / 2),
                                        (self.imu_yaw) * 180 / math.pi, 1)
            img = cv2.warpAffine(img, M, (w, h))

            #circle mask
            circle_mask = np.zeros_like(img)
            circle_mask = cv2.circle(circle_mask, (w / 2, h / 2), h / 2,
                                     [255, 255, 255], -1)
            circle_mask = circle_mask[:, :, 0]

            # print(h,w)
            img = self.img_correction(img)

            blur = cv2.GaussianBlur(img, (7, 7), 0)
            hsv = cv2.cvtColor(blur, cv2.COLOR_BGR2HSV)

            mask = cv2.adaptiveThreshold(hsv[:, :, 2],255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
                        cv2.THRESH_BINARY,21, 2)

            kernel = np.ones((5, 5), np.uint8)
            opening = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
            opening = 255 - opening
            opening = cv2.dilate(opening, None, iterations=1)
            contour_mask = 255 - opening
            opening[circle_mask == 0] = 0
            #fit lines to extract major direction
            minLineLength = 100
            lines = cv2.HoughLinesP(image=opening,rho=1,theta=np.pi/180,\
             threshold=100,lines=np.array([]), minLineLength=minLineLength, maxLineGap=12)

            grad = np.zeros((len(lines), 1))
            i = 0
            for line in lines:
                #find two major gradients
                x1, y1, x2, y2 = line[0][0], line[0][1], line[0][2], line[0][3]
                theta = math.atan(float(y2 - y1) / (x2 - x1)) * 180 / math.pi

                grad[i] = theta
                i += 1
                # cv2.line(img, (x1, y1), (x2, y2), (0, 0, 255), 3, cv2.LINE_AA)
                cv2.line(contour_mask, (x1, y1), (x2, y2), 0, 1, cv2.LINE_AA)

            hist, bin_edges = np.histogram(grad, density=False)
            ind = np.argmax(hist)
            best_grad = round((bin_edges[ind] + bin_edges[ind + 1]) / 2, 2)

            ind = np.where(np.abs(grad - best_grad) < 10)
            good_grads = grad[ind]
            best_grad = np.mean(good_grads)

            # contour_mask=self.mask_correction(contour_mask)
            M = cv2.getRotationMatrix2D((w / 2, h / 2), best_grad, 1)
            contour_mask = cv2.warpAffine(contour_mask, M, (w, h))

            (_, contours, _) = cv2.findContours(contour_mask,
                                                cv2.RETR_EXTERNAL,
                                                cv2.CHAIN_APPROX_NONE)
            contour_mask = cv2.cvtColor(contour_mask, cv2.COLOR_GRAY2BGR)
            areas = []
            border = 0
            r = []

            for contour in contours:
                rect = cv2.boundingRect(contour)

                if rect[0] > border and rect[0] + rect[2] < w - border and rect[
                        1] > border and rect[3] + rect[1] < h - border:
                    area = int(rect[3] * rect[2])
                    # print(area)
                    ar = float(rect[2]) / rect[3]
                    real_ar = 0.25 / 0.12
                    if area > 1000 and area < 120000 and abs(ar / real_ar -
                                                             1) < 0.3:
                        cv2.rectangle(contour_mask, (rect[0], rect[1]),
                                      (rect[2] + rect[0], rect[3] + rect[1]),
                                      (0, 255, 0), 2)
                        areas.append(area)
                        r.append(rect)

            areas = np.asarray(areas)
            hist, bin_edges = np.histogram(areas, bins='fd', density=False)
            ind = np.argmax(hist)
            # best_area=(bin_edges[ind]+bin_edges[ind+1])/2

            best_area = round((bin_edges[ind] + bin_edges[ind + 1]) / 2, 2)
            ind = np.where(np.abs(areas - best_area) < 0.1 * best_area)
            if len(ind) > 5:
                good_areas = areas[ind]
                best_area = np.mean(good_areas)

            pred_depth = self.predict_depth(best_area)

            pred_depth = pred_depth * math.cos(self.imu_pitch) * math.cos(
                self.imu_roll)

            for tile in self.tiles:
                r = tile.one_step_update(r)

            for rect in r:
                self.tiles.append(Tile(rect, self.ind_count))
                self.ind_count += 1

            for tile in self.tiles:
                if tile.alive == False:
                    self.tiles.remove(tile)

            del_x = []
            del_y = []
            for tile in self.tiles:
                color = self.colors[tile.ind % 20]
                if len(tile.centers) > 2:
                    del_x.append(tile.centers[-1][1] - tile.centers[-2][1])
                    del_y.append(tile.centers[-1][0] - tile.centers[-2][0])
                contour_mask = cv2.circle(
                    contour_mask,
                    (int(tile.centers[-1][0]), int(tile.centers[-1][1])), 5,
                    color, -1)
                cv2.putText(contour_mask, str(tile.ind),
                            (tile.bb[0] + 10, tile.bb[1] + 10), font, 0.8,
                            color, 1, cv2.LINE_AA)

            hist, bin_edges = np.histogram(np.asarray(del_x),
                                           bins='fd',
                                           density=False)
            ind = np.argmax(hist)
            best_del_x = round((bin_edges[ind] + bin_edges[ind + 1]) / 2, 2)

            hist, bin_edges = np.histogram(np.asarray(del_y),
                                           bins='fd',
                                           density=False)
            ind = np.argmax(hist)
            best_del_y = round((bin_edges[ind] + bin_edges[ind + 1]) / 2, 2)

            #tile real world dimension
            fov_w, fov_h = 48 * math.pi / 180, 36 * math.pi / 180
            px_W, px_H = 640, 480
            W = 2 * pred_depth * math.tan(fov_w / 2) + 0.0001
            ppm = px_W / W
            self.pos_x -= best_del_x / ppm
            self.pos_y -= best_del_y / ppm
            self.pub_odom(self.pos_x, self.pos_y, pred_depth, best_grad)

            # print(best_grad, best_area, pred_depth)
            cv2.rectangle(contour_mask, (0, 0), (w, 80), (0, 0, 0), -1)

            text = "direction " + str(
                round(best_grad + self.imu_yaw * 180 / math.pi,
                      2)) + ", height: " + str(round(pred_depth, 2)) + "m"
            text2 = "x: " + str(round(self.pos_x, 2)) + "m, y: " + str(
                round(self.pos_y, 2)) + "m"
            color = (255, 255, 255)
            cv2.putText(contour_mask, text, (50, 20), font, 0.8, color, 1,
                        cv2.LINE_AA)
            cv2.putText(contour_mask, text2, (50, 60), font, 0.8, color, 1,
                        cv2.LINE_AA)

            opening = cv2.cvtColor(opening, cv2.COLOR_GRAY2BGR)
            self.img_pub.publish(
                self.bridge.cv2_to_imgmsg(np.hstack([img, contour_mask]),
                                          "bgr8"))

        self.frame_counter += 1
Example #46
0
 def smart_bin_feature(feature):
     _, bin_edges = np.histogram(feature.fillna(0), bins = 'fd')
     return np.digitize(feature, bin_edges.flatten(), right = True)
Example #47
0
  get_position=["longitude", "latitude"],
  auto_highlight=True,
  radius=100,
  extruded=True,
  pickable=False,
  
  elevation_range=[0, 1000],
  ),
],
)

)

st.subheader("Breakdown by minute between %i:00 and %i:00" % (hour, (hour + 1) % 24))
filtered = data[(data[DATE_TIME].dt.hour >= hour) & (data[DATE_TIME].dt.hour < (hour + 1))]
hist = np.histogram(filtered[DATE_TIME].dt.minute, bins=60, range=(0, 60))[0]
chart_data = pd.DataFrame({"minute": range(60), "crashes": hist})

fig = px.bar(chart_data, x='minute', y='crashes', hover_data=['minute', 'crashes'], height=400)
st.write(fig)

st.header("Top 5 dangerous streets by affected class")
select = st.selectbox('Affected class', ['Pedestrians', 'Cyclists', 'Motorists'])

if select == 'Pedestrians':
	st.write(data.query("`number of pedestrians injured` >= 1")[["on street name", "number of pedestrians injured"]].sort_values(by=['number of pedestrians injured'], ascending=False).dropna(how="any")[:5])

elif select == 'Cyclists':
	st.write(data.query("`number of cyclist injured` >= 1")[["on street name", "number of cyclist injured"]].sort_values(by=['number of cyclist injured'], ascending=False).dropna(how="any")[:5])

else:
#for ix, each in enumerate([hb_contacts_vdmlevel, polar_contacts_vdmlevel]):
#    plot(1,ix,each,ix+2)
# bottom subplot, for each vdm (atomlevel)
#for ix, each in enumerate([hb_contacts_atomlevel, ca_hb_contacts_atomlevel]):
#    flat = [item for sublist in each for item in sublist]
#    plot(2,ix,flat,ix+4)
#for ix, each in enumerate([vdw_contacts_atomlevel, polar_contacts_atomlevel]):
#    flat = [item for sublist in each for item in sublist]
#    plot(3,ix,flat,ix+6)
#plt.suptitle('%s contacts - raw counts at %s A^2 cutoff' % (ifg,sasa))
#plt.show()

#### for scoring ####
# method 1) bill's way

totalhbonds = np.histogram(totalhbond,
                           bins=int(max(totalhbond)))  ## list of counts starts
# at the 0th # of bonds (so same as the bin edge). the LAST ONE is the sum of the
# last 2 bin edges
vdws = np.histogram(vdw_contacts_vdmlevel,
                    bins=int(max(vdw_contacts_vdmlevel) + 1))
polars = np.histogram(polar_contacts_vdmlevel,
                      bins=int(max(polar_contacts_vdmlevel) + 1))
scores = {'hbonds': {}, 'vdws': {}, 'polars': {}}
lbl = ['hbonds', 'vdws', 'polars']
for ix, hist in enumerate([totalhbonds]):
    #for ix, hist in enumerate([totalhbonds, vdws, polars]):
    plt.figure()
    score_dict = scores[lbl[ix]]
    N = np.cumsum(hist[0])
    N = sum([x <= 0.95 * sum(hist[0]) for x in N])
    for count, bond_num in zip(hist[0], hist[1][:-1]):
Example #49
0
def binned(unbinned):
    mle, x = unbinned
    nx, xe = np.histogram(x, bins=50, range=(-3, 3))
    return mle, nx, xe
Example #50
0
''' En çok satış yapan 10 platformu bulup diğerlerini dropluyoruz. '''
top10SalesPlatform = vgs_data.groupby('Platform', as_index = False).agg({'Global_Sales':'sum'}).sort_values('Global_Sales', ascending = False)
drop_columns = top10SalesPlatform['Platform'].tail(21).to_numpy()

''' 1980 - 1990 arasında satışlar çok düşük olduğu için, bu yılları grafikten çıkarıyoruz.'''
drop_rows = range(1980, 1990)

vgs_data_grouped.drop(drop_columns, axis=1, inplace = True)
vgs_data_grouped.drop(['GBA', 'PC', 'X360', 'PSP','Wii','DS','PS'], axis=1, inplace = True)
vgs_data_grouped.drop(drop_rows, axis=0, inplace = True)
vgs_data_grouped2 = vgs_data_grouped.fillna(0) # If necessary 
print(vgs_data_grouped)

''' It's finally time to plot the data. '''
count, bin_edges = np.histogram(vgs_data_grouped2, 10)
xmin = bin_edges[0] - 10   #  first bin value is 31.0, adding buffer of 10 for aesthetic purposes 
xmax = bin_edges[-1] + 10  #  last bin value is 308.0, adding buffer of 10 for aesthetic purposes

# stacked Histogram
vgs_data_grouped.plot(kind='hist',
          figsize=(15, 9), 
          bins=10,
          xticks=bin_edges,
          alpha=0.6,
          color=['coral', 'darkslateblue', 'mediumseagreen'],
          stacked=False,
          xlim=(xmin, xmax)
         )

plt.title('Histogram of PS Game Sales Over the Years', color = 'red', fontsize = 20)
Example #51
0
 def test_goodness_of_fit_2(self):
     vals = self.generate(3, (5, 20), True, [0.3, 0.3, 0.4]).get()
     counts = numpy.histogram(vals, bins=numpy.arange(4))[0]
     expected = numpy.array([30, 30, 40])
     assert _hypothesis.chi_square_test(counts, expected)
Example #52
0
def main(folding_id, inliner_classes, total_classes, folds=5):
    batch_size = 64
    mnist_train = []
    mnist_valid = []
    z_size = 32

    def shuffle_in_unison(a, b):
        assert len(a) == len(b)
        shuffled_a = np.empty(a.shape, dtype=a.dtype)
        shuffled_b = np.empty(b.shape, dtype=b.dtype)
        permutation = np.random.permutation(len(a))
        for old_index, new_index in enumerate(permutation):
            shuffled_a[new_index] = a[old_index]
            shuffled_b[new_index] = b[old_index]
        return shuffled_a, shuffled_b

    outlier_classes = []
    for i in range(total_classes):
        if i not in inliner_classes:
            outlier_classes.append(i)

    for i in range(folds):
        if i != folding_id:
            with open('data_fold_%d.pkl' % i, 'rb') as pkl:
                fold = pickle.load(pkl)
            if len(mnist_valid) == 0:
                mnist_valid = fold
            else:
                mnist_train += fold

    with open('data_fold_%d.pkl' % folding_id, 'rb') as pkl:
        mnist_test = pickle.load(pkl)

    #keep only train classes
    mnist_train = [x for x in mnist_train if x[0] in inliner_classes]

    random.seed(0)
    random.shuffle(mnist_train)

    def list_of_pairs_to_numpy(l):
        return np.asarray([x[1] for x in l],
                          np.float32), np.asarray([x[0] for x in l], np.int)

    print("Train set size:", len(mnist_train))

    mnist_train_x, mnist_train_y = list_of_pairs_to_numpy(mnist_train)

    G = Generator(z_size).to(device)
    E = Encoder(z_size).to(device)
    setup(E)
    setup(G)
    G.eval()
    E.eval()

    G.load_state_dict(torch.load("Gmodel.pkl"))
    E.load_state_dict(torch.load("Emodel.pkl"))

    sample = torch.randn(64, z_size).to(device)
    sample = G(sample.view(-1, z_size, 1, 1)).cpu()
    save_image(sample.view(64, 1, 32, 32), 'sample.png')

    if True:
        zlist = []
        rlist = []

        for it in range(len(mnist_train_x) // batch_size):
            x = Variable(extract_batch(mnist_train_x, it,
                                       batch_size).view(-1, 32 * 32).data,
                         requires_grad=True)
            z = E(x.view(-1, 1, 32, 32))
            recon_batch = G(z)
            z = z.squeeze()

            recon_batch = recon_batch.squeeze().cpu().detach().numpy()
            x = x.squeeze().cpu().detach().numpy()

            z = z.cpu().detach().numpy()

            for i in range(batch_size):
                distance = np.sum(
                    np.power(recon_batch[i].flatten() - x[i].flatten(), power))
                rlist.append(distance)

            zlist.append(z)

        data = {}
        data['rlist'] = rlist
        data['zlist'] = zlist

        with open('data.pkl', 'wb') as pkl:
            pickle.dump(data, pkl)

    with open('data.pkl', 'rb') as pkl:
        data = pickle.load(pkl)

    rlist = data['rlist']
    zlist = data['zlist']

    counts, bin_edges = np.histogram(rlist, bins=30, normed=True)

    plt.plot(bin_edges[1:], counts, linewidth=2)
    plt.xlabel(r"Distance, $\left \|\| I - \hat{I} \right \|\|$",
               fontsize=axis_title_size)
    plt.ylabel('Probability density', fontsize=axis_title_size)
    plt.title(
        r"PDF of distance for reconstruction error, $p\left(\left \|\| I - \hat{I} \right \|\| \right)$",
        fontsize=title_size)
    plt.grid(True)
    plt.xticks(fontsize=ticks_size)
    plt.yticks(fontsize=ticks_size)
    plt.tight_layout(rect=(0.0, 0.0, 1, 0.95))
    plt.savefig('mnist_d%d_randomsearch.pdf' % inliner_classes[0])
    plt.savefig('mnist_d%d_randomsearch.eps' % inliner_classes[0])
    plt.clf()
    plt.cla()
    plt.close()

    def r_pdf(x, bins, count):
        if x < bins[0]:
            return max(count[0], 1e-308)
        if x >= bins[-1]:
            return max(count[-1], 1e-308)
        id = np.digitize(x, bins) - 1
        return max(count[id], 1e-308)

    zlist = np.concatenate(zlist)
    for i in range(z_size):
        plt.hist(zlist[:, i], bins='auto', histtype='step')

    plt.xlabel(r"$z$", fontsize=axis_title_size)
    plt.ylabel('Probability density', fontsize=axis_title_size)
    plt.title(r"PDF of embeding $p\left(z \right)$", fontsize=title_size)
    plt.grid(True)
    plt.xticks(fontsize=ticks_size)
    plt.yticks(fontsize=ticks_size)
    plt.tight_layout(rect=(0.0, 0.0, 1, 0.95))
    plt.savefig('mnist_d%d_embeding.pdf' % inliner_classes[0])
    plt.savefig('mnist_d%d_embeding.eps' % inliner_classes[0])
    plt.clf()
    plt.cla()
    plt.close()

    gennorm_param = np.zeros([3, z_size])
    for i in range(z_size):
        betta, loc, scale = scipy.stats.gennorm.fit(zlist[:, i])
        gennorm_param[0, i] = betta
        gennorm_param[1, i] = loc
        gennorm_param[2, i] = scale

    def compute_threshold(mnist_valid, percentage):
        #############################################################################################
        # Searching for threshold on validation set
        random.shuffle(mnist_valid)
        mnist_valid_outlier = [
            x for x in mnist_valid if x[0] in outlier_classes
        ]
        mnist_valid_inliner = [
            x for x in mnist_valid if x[0] in inliner_classes
        ]

        inliner_count = len(mnist_valid_inliner)
        outlier_count = inliner_count * percentage // (100 - percentage)

        if len(mnist_valid_outlier) > outlier_count:
            mnist_valid_outlier = mnist_valid_outlier[:outlier_count]
        else:
            outlier_count = len(mnist_valid_outlier)
            inliner_count = outlier_count * (100 - percentage) // percentage
            mnist_valid_inliner = mnist_valid_inliner[:inliner_count]

        _mnist_valid = mnist_valid_outlier + mnist_valid_inliner
        random.shuffle(_mnist_valid)

        mnist_valid_x, mnist_valid_y = list_of_pairs_to_numpy(_mnist_valid)

        result = []
        novel = []

        for it in range(len(mnist_valid_x) // batch_size):
            x = Variable(extract_batch(mnist_valid_x, it,
                                       batch_size).view(-1, 32 * 32).data,
                         requires_grad=True)
            label = extract_batch(mnist_valid_y, it, batch_size)

            z = E(x.view(-1, 1, 32, 32))
            recon_batch = G(z)
            z = z.squeeze()

            J = compute_jacobian(x, z)
            J = J.cpu().numpy()
            z = z.cpu().detach().numpy()

            recon_batch = recon_batch.squeeze().cpu().detach().numpy()
            x = x.squeeze().cpu().detach().numpy()

            for i in range(batch_size):
                u, s, vh = np.linalg.svd(J[i, :, :], full_matrices=False)
                logD = np.sum(np.log(np.abs(s)))  # | \mathrm{det} S^{-1} |

                p = scipy.stats.gennorm.pdf(z[i], gennorm_param[0, :],
                                            gennorm_param[1, :],
                                            gennorm_param[2, :])
                logPz = np.sum(np.log(p))

                # Sometimes, due to rounding some element in p may be zero resulting in Inf in logPz
                # In this case, just assign some large negative value to make sure that the sample
                # is classified as unknown.
                if not np.isfinite(logPz):
                    logPz = -1000

                distance = np.sum(
                    np.power(x[i].flatten() - recon_batch[i].flatten(), power))

                logPe = np.log(
                    r_pdf(distance, bin_edges,
                          counts))  # p_{\|W^{\perp}\|} (\|w^{\perp}\|)
                logPe -= np.log(distance) * (32 * 32 - z_size
                                             )  # \| w^{\perp} \|}^{m-n}

                P = logD + logPz + logPe

                result.append(P)
                novel.append(label[i].item() in inliner_classes)

        result = np.asarray(result, dtype=np.float32)
        novel = np.asarray(novel, dtype=np.float32)

        minP = min(result) - 1
        maxP = max(result) + 1

        best_e = 0
        best_f = 0
        best_e_ = 0
        best_f_ = 0

        not_novel = np.logical_not(novel)

        for e in np.arange(minP, maxP, 0.1):
            y = np.greater(result, e)

            true_positive = np.sum(np.logical_and(y, novel))
            false_positive = np.sum(np.logical_and(y, not_novel))
            false_negative = np.sum(np.logical_and(np.logical_not(y), novel))

            if true_positive > 0:
                f = GetF1(true_positive, false_positive, false_negative)
                if f > best_f:
                    best_f = f
                    best_e = e
                if f >= best_f_:
                    best_f_ = f
                    best_e_ = e

        best_e = (best_e + best_e_) / 2.0

        print("Best e: ", best_e)
        return best_e

    def test(mnist_test, percentage, e):
        true_positive = 0
        true_negative = 0
        false_positive = 0
        false_negative = 0

        random.shuffle(mnist_test)
        mnist_test_outlier = [x for x in mnist_test if x[0] in outlier_classes]
        mnist_test_inliner = [x for x in mnist_test if x[0] in inliner_classes]

        inliner_count = len(mnist_test_inliner)
        outlier_count = inliner_count * percentage // (100 - percentage)

        if len(mnist_test_outlier) > outlier_count:
            mnist_test_outlier = mnist_test_outlier[:outlier_count]
        else:
            outlier_count = len(mnist_test_outlier)
            inliner_count = outlier_count * (100 - percentage) // percentage
            mnist_test_inliner = mnist_test_inliner[:inliner_count]

        mnist_test = mnist_test_outlier + mnist_test_inliner
        random.shuffle(mnist_test)

        mnist_test_x, mnist_test_y = list_of_pairs_to_numpy(mnist_test)

        count = 0

        result = []

        for it in range(len(mnist_test_x) // batch_size):
            x = Variable(extract_batch(mnist_test_x, it,
                                       batch_size).view(-1, 32 * 32).data,
                         requires_grad=True)
            label = extract_batch(mnist_test_y, it, batch_size)

            z = E(x.view(-1, 1, 32, 32))
            recon_batch = G(z)
            z = z.squeeze()

            J = compute_jacobian(x, z)

            J = J.cpu().numpy()

            z = z.cpu().detach().numpy()

            recon_batch = recon_batch.squeeze().cpu().detach().numpy()
            x = x.squeeze().cpu().detach().numpy()

            for i in range(batch_size):
                u, s, vh = np.linalg.svd(J[i, :, :], full_matrices=False)
                logD = np.sum(np.log(np.abs(s)))

                p = scipy.stats.gennorm.pdf(z[i], gennorm_param[0, :],
                                            gennorm_param[1, :],
                                            gennorm_param[2, :])
                logPz = np.sum(np.log(p))

                # Sometimes, due to rounding some element in p may be zero resulting in Inf in logPz
                # In this case, just assign some large negative value to make sure that the sample
                # is classified as unknown.
                if not np.isfinite(logPz):
                    logPz = -1000

                distance = np.sum(
                    np.power(x[i].flatten() - recon_batch[i].flatten(), power))

                logPe = np.log(r_pdf(distance, bin_edges, counts))
                logPe -= np.log(distance) * (32 * 32 - z_size)

                count += 1

                P = logD + logPz + logPe

                if (label[i].item() in inliner_classes) != (P > e):
                    if not label[i].item() in inliner_classes:
                        false_positive += 1
                    if label[i].item() in inliner_classes:
                        false_negative += 1
                else:
                    if label[i].item() in inliner_classes:
                        true_positive += 1
                    else:
                        true_negative += 1

                result.append(((label[i].item() in inliner_classes), P))

        error = 100 * (true_positive + true_negative) / count

        y_true = [x[0] for x in result]
        y_scores = [x[1] for x in result]

        try:
            auc = roc_auc_score(y_true, y_scores)
        except:
            auc = 0

        with open('result_d%d_p%d.pkl' % (inliner_classes[0], percentage),
                  'wb') as output:
            pickle.dump(result, output)

        print("Percentage ", percentage)
        print("Error ", error)
        f1 = GetF1(true_positive, false_positive, false_negative)
        print("F1 ", GetF1(true_positive, false_positive, false_negative))
        print("AUC ", auc)

        #inliers
        X1 = [x[1] for x in result if x[0]]

        #outliers
        Y1 = [x[1] for x in result if not x[0]]

        minP = min([x[1] for x in result]) - 1
        maxP = max([x[1] for x in result]) + 1

        ##################################################################
        # FPR at TPR 95
        ##################################################################
        fpr95 = 0.0
        clothest_tpr = 1.0
        dist_tpr = 1.0
        for e in np.arange(minP, maxP, 0.2):
            tpr = np.sum(np.greater_equal(X1, e)) / np.float(len(X1))
            fpr = np.sum(np.greater_equal(Y1, e)) / np.float(len(Y1))
            if abs(tpr - 0.95) < dist_tpr:
                dist_tpr = abs(tpr - 0.95)
                clothest_tpr = tpr
                fpr95 = fpr

        print("tpr: ", clothest_tpr)
        print("fpr95: ", fpr95)

        ##################################################################
        # Detection error
        ##################################################################
        error = 1.0
        for e in np.arange(minP, maxP, 0.2):
            tpr = np.sum(np.less(X1, e)) / np.float(len(X1))
            fpr = np.sum(np.greater_equal(Y1, e)) / np.float(len(Y1))
            error = np.minimum(error, (tpr + fpr) / 2.0)

        print("Detection error: ", error)

        ##################################################################
        # AUPR IN
        ##################################################################
        auprin = 0.0
        recallTemp = 1.0
        for e in np.arange(minP, maxP, 0.2):
            tp = np.sum(np.greater_equal(X1, e))
            fp = np.sum(np.greater_equal(Y1, e))
            if tp + fp == 0:
                continue
            precision = tp / (tp + fp)
            recall = tp / np.float(len(X1))
            auprin += (recallTemp - recall) * precision
            recallTemp = recall
        auprin += recall * precision

        print("auprin: ", auprin)

        ##################################################################
        # AUPR OUT
        ##################################################################
        minp, maxP = -maxP, -minP
        X1 = [-x for x in X1]
        Y1 = [-x for x in Y1]
        auprout = 0.0
        recallTemp = 1.0
        for e in np.arange(minP, maxP, 0.2):
            tp = np.sum(np.greater_equal(Y1, e))
            fp = np.sum(np.greater_equal(X1, e))
            if tp + fp == 0:
                continue
            precision = tp / (tp + fp)
            recall = tp / np.float(len(Y1))
            auprout += (recallTemp - recall) * precision
            recallTemp = recall
        auprout += recall * precision

        print("auprout: ", auprout)

        with open(os.path.join("results.txt"), "a") as file:
            file.write("Class: %d\n Percentage: %d\n"
                       "Error: %f\n F1: %f\n AUC: %f\nfpr95: %f"
                       "\nDetection: %f\nauprin: %f\nauprout: %f\n\n" %
                       (inliner_classes[0], percentage, error, f1, auc, fpr95,
                        error, auprin, auprout))

        return auc, f1, fpr95, error, auprin, auprout

    percentages = [10, 20, 30, 40, 50]

    results = {}

    for p in percentages:
        e = compute_threshold(mnist_valid, p)
        results[p] = test(mnist_test, p, e)

    return results
Example #53
0
    def pt_njet_reweight(self, bkg_proc, year, presel, norm_first=True):
        """
        Derive a reweighting for a single bkg process in a m(ee) control region around the Z-peak, double differentially 
        in bins on pT(ee) and nJets, to map bkg process to Data. Then apply this in the signal region.

        Arguments
        ---------
        bkg_proc: string
            name of the physics process we want to re-weight. Nominally this is for Drell-Yan.
        year: float
            year to be re-weighted (perform this separately for each year)
        presel: string
            preselection to apply to go from the CR -> SR
        norm_first: bool
            normalise the simulated background to data. Results in a shape-only correction
        """

        #can remove this once nJets is put in ntuples from dumper
        outcomes_mc_bkg = [ self.mc_df_bkg['leadJetPt'].lt(0),
                            self.mc_df_bkg['leadJetPt'].gt(0) & self.mc_df_bkg['subleadJetPt'].lt(0), 
                            self.mc_df_bkg['leadJetPt'].gt(0) & self.mc_df_bkg['subleadJetPt'].gt(0)
                          ]

        outcomes_data   = [ self.data_df['leadJetPt'].lt(0),
                            self.data_df['leadJetPt'].gt(0) & self.data_df['subleadJetPt'].lt(0), 
                            self.data_df['leadJetPt'].gt(0) & self.data_df['subleadJetPt'].gt(0)
                          ]
        jets    = [0, 1, 2] # 2 really means nJet >= 2

        self.mc_df_bkg['nJets'] = np.select(outcomes_mc_bkg, jets) 
        self.data_df['nJets'] = np.select(outcomes_data, jets) 

        #apply re-weighting
        pt_bins = np.linspace(0,200,101)
        jet_bins = [0,1,2]
        n_jets_to_sfs_map = {}


        #derive pt and njet based SFs
        for n_jets in jet_bins:
            if not n_jets==jet_bins[-1]: 
                bkg_df = self.mc_df_bkg.query('proc=="{}" and year=="{}" and dielectronMass>80 and dielectronMass<100 and nJets=={}'.format(bkg_proc,year, n_jets))
                data_df = self.data_df.query('year=="{}" and dielectronMass>80 and dielectronMass<100 and nJets=={}'.format(year,n_jets))       
            else: 
                bkg_df = self.mc_df_bkg.query('proc=="{}" and year=="{}" and dielectronMass>80 and dielectronMass<100 and nJets>={}'.format(bkg_proc,year, n_jets))
                data_df = self.data_df.query('year=="{}" and dielectronMass>80 and dielectronMass<100 and nJets>={}'.format(year,n_jets))       

            if norm_first:
                CR_norm_i_jet_bin = (np.sum(data_df['weight'])/np.sum(bkg_df['weight']))
                bkg_df['weight'] *= CR_norm_i_jet_bin

            bkg_pt_binned, _ = np.histogram(bkg_df['dielectronPt'], bins=pt_bins, weights=bkg_df['weight'])
            data_pt_binned, bin_edges = np.histogram(data_df['dielectronPt'], bins=pt_bins)
            n_jets_to_sfs_map[n_jets] = data_pt_binned/bkg_pt_binned

        #now apply the proc targeting selection on all dfs, and re-save. Then apply derived SFs
        self.apply_more_cuts(presel)
        if norm_first:
            SR_i_jet_to_norm = {}
            for n_jets in jet_bins:
                SR_i_jet_to_norm[n_jets] = np.sum(self.data_df['weight']) / np.sum(self.mc_df_bkg['weight'])
            self.mc_df_bkg['weight'] = self.mc_df_bkg.apply(self.pt_njet_reweight_helper, axis=1, args=[bkg_proc, year, bin_edges, n_jets_to_sfs_map, True, SR_i_jet_to_norm])

        else: self.mc_df_bkg['weight'] = self.mc_df_bkg.apply(self.pt_njet_reweight_helper, axis=1, args=[bkg_proc, year, bin_edges, n_jets_to_sfs_map, True, None])
        self.save_modified_dfs(year)
Example #54
0
def equalize(f):
    h = np.histogram(f, bins=np.arange(2**16))[0]
    H = np.cumsum(h) / float(np.sum(h))
    e = np.floor(H[f.flatten().astype(np.uint16)]*2**16-1)
    return e.reshape(f.shape)
Example #55
0
# just for display:
# make a copy of the image, call it masked_image, and
# use np.logical_not() and indexing to apply the mask to it
# WRITE YOUR CODE HERE

# create a new window and display masked_image, to verify the
# validity of your mask
# WRITE YOUR CODE HERE

# list to select colors of each channel line
colors = ("r", "g", "b")
channel_ids = (0, 1, 2)

# create the histogram plot, with three lines, one for
# each color
plt.xlim([0, 256])
for channel_id, c in zip(channel_ids, colors):
    # change this to use your circular mask to apply the histogram
    # operation to the 7th well of the first row
    # MODIFY CODE HERE
    histogram, bin_edges = np.histogram(image[:, :, channel_id],
                                        bins=256,
                                        range=(0, 256))

    plt.plot(bin_edges[0:-1], histogram, color=c)

plt.xlabel("color value")
plt.ylabel("pixel count")

plt.show()
Example #56
0
 def test_goodness_of_fit_2(self):
     mx = 5
     vals = self.generate(mx, (5, 5)).get()
     counts = numpy.histogram(vals, bins=numpy.arange(mx + 2))[0]
     expected = numpy.array([float(vals.size) / (mx + 1)] * (mx + 1))
     assert _hypothesis.chi_square_test(counts, expected)
Example #57
0
#! usr/bin/python
#coding=utf-8

# import numpy as np
import cv2
import numpy as np
from matplotlib import pyplot as plt

# Load an color image in grayscale
img = cv2.imread('../pxp_blue.jpg', 0)
#opencv方法读取-cv2.calcHist(速度最快)
#图像,通道[0]-灰度图,掩膜-无,灰度级,像素范围
hist_cv = cv2.calcHist([img], [0], None, [256], [0, 256])
#numpy方法读取-np.histogram()
hist_np, bins = np.histogram(img.ravel(), 256, [0, 256])
#numpy的另一种方法读取-np.bincount()(速度=10倍法2)
hist_np2 = np.bincount(img.ravel(), minlength=256)
plt.subplot(221), plt.imshow(img, 'gray')
plt.subplot(222), plt.plot(hist_cv)
plt.subplot(223), plt.plot(hist_np)
plt.subplot(224), plt.plot(hist_np2)
plt.show()
Example #58
0
    return phil_list,et_list,s1_list

#%%
# ========================== MCMC sampling ============================== 
varnames, bounds = get_var_bounds(MODE)
scale = bounds[2]
idx_sigma_vod = varnames.index('sigma_vod')
valid_vod = ~np.isnan(VOD_ma); VOD_ma_valid = VOD_ma[valid_vod]

idx_sigma_et = varnames.index('sigma_et')
valid_et = ~np.isnan(ET); ET_valid = ET[valid_et]

idx_sigma_sm = varnames.index('sigma_sm')
valid_sm = ~np.isnan(SOILM); SOILM_valid = SOILM[valid_sm]
bins = np.arange(0,1.02,0.01)
counts, bin_edges = np.histogram(SOILM_valid, bins=bins, normed=True)
cdf1 = np.cumsum(counts)/sum(counts)

Nobs = sum(valid_vod)+sum(valid_et)+sum(valid_sm)
    

def Gaussian_loglik(theta0):
    theta = theta0*scale
    PSIL_hat,ET_hat,SM_hat = runhh_2soil_hydro(theta)
    ET_hat = hour2week(ET_hat,UNIT=24)[~discard_et][valid_et] # mm/hr -> mm/day
    dPSIL = hour2day(PSIL_hat,idx)[~discard_vod]
    VOD_hat = fitVOD_RMSE(dPSIL,dLAI,VOD_ma)[valid_vod]
    SM_hat = hour2day(SM_hat,idx)[~discard_vod][::2][valid_sm]
    
    sigma_VOD, sigma_ET,sigma_SM = (theta[idx_sigma_vod], theta[idx_sigma_et],theta[idx_sigma_sm])
    loglik_vod = np.nanmean(norm.logpdf(VOD_ma_valid,VOD_hat,sigma_VOD))
Example #59
0
    def __init__(self, parent=None):
        self.parent = parent

        #############################
        ## Dock: ROI histogram
        #############################
        self.dock = Dock("ROI Histogram", size=(1, 1))
        self.win = pg.PlotWidget(title="ROI histogram")
        hist, bin = np.histogram(np.random.random(1000), bins=1000)
        self.win.plot(bin,
                      hist,
                      stepMode=True,
                      fillLevel=0,
                      brush=(0, 0, 255, 150),
                      clear=True)
        self.dock.addWidget(self.win)
        self.roiCheckbox = QtGui.QCheckBox('Update ROI')
        self.roiCheckbox.setCheckState(True)
        self.roiCheckbox.setTristate(False)
        self.roiCheckbox.stateChanged.connect(self.updateRoiStatus)
        # Layout
        self.winL = pg.LayoutWidget()
        self.winL.addWidget(self.roiCheckbox, row=0, col=0)
        self.dock.addWidget(self.winL)

        #############################
        # Local variables
        #############################
        self.updateRoiStatus = True

        self.roiCurrent = None
        # Custom ROI for selecting an image region
        self.roi = pg.ROI(pos=[0, -250],
                          size=[200, 200],
                          snapSize=1.0,
                          scaleSnap=True,
                          translateSnap=True,
                          pen={
                              'color': 'g',
                              'width': 4,
                              'style': QtCore.Qt.DashLine
                          })
        self.roi.addScaleHandle([1, 0.5], [0.5, 0.5])
        self.roi.addScaleHandle([0.5, 0], [0.5, 0.5])
        self.roi.addScaleHandle([0.5, 1], [0.5, 0.5])
        self.roi.addScaleHandle([0, 0.5], [0.5, 0.5])
        self.roi.addScaleHandle(
            [0, 0],
            [1, 1
             ])  # bottom,left handles scaling both vertically and horizontally
        self.roi.addScaleHandle(
            [1, 1],
            [0, 0
             ])  # top,right handles scaling both vertically and horizontally
        self.roi.addScaleHandle([1, 0], [
            0, 1
        ])  # bottom,right handles scaling both vertically and horizontally
        self.roi.addScaleHandle([0, 1], [1, 0])
        self.roi.name = 'rect'
        self.parent.img.win.getView().addItem(self.roi)
        self.roiPoly = pg.PolyLineROI([[300, -250], [300, -50], [500, -50],
                                       [500, -150], [375, -150], [375, -250]],
                                      closed=True,
                                      snapSize=1.0,
                                      scaleSnap=True,
                                      translateSnap=True,
                                      pen={
                                          'color': 'g',
                                          'width': 4,
                                          'style': QtCore.Qt.DashLine
                                      })
        self.roiPoly.name = 'poly'
        self.parent.img.win.getView().addItem(self.roiPoly)
        self.roiCircle = pg.CircleROI([600, -250],
                                      size=[200, 200],
                                      snapSize=0.1,
                                      scaleSnap=False,
                                      translateSnap=False,
                                      pen={
                                          'color': 'g',
                                          'width': 4,
                                          'style': QtCore.Qt.DashLine
                                      })
        self.roiCircle.addScaleHandle([0.1415, 0.707 * 1.2], [0.5, 0.5])
        self.roiCircle.addScaleHandle([0.707 * 1.2, 0.1415], [0.5, 0.5])
        self.roiCircle.addScaleHandle([0.1415, 0.1415], [0.5, 0.5])
        #self.roiCircle.addScaleHandle([0, 0.5], [0.5, 0.5]) # west: pyqtgraph error
        self.roiCircle.addScaleHandle([0.5, 0.0], [0.5, 0.5])  # south
        self.roiCircle.addScaleHandle([0.5, 1.0], [0.5, 0.5])  # north
        #self.roiCircle.addScaleHandle([1.0, 0.5], [0.5, 0.5]) # east: pyqtgraph error
        self.roiCircle.name = 'circ'
        self.parent.img.win.getView().addItem(self.roiCircle)

        self.rois = []
        self.rois.append(self.roi)
        self.rois.append(self.roiPoly)
        self.rois.append(self.roiCircle)
        for roi in self.rois:
            roi.sigRegionChangeFinished.connect(self.updateRoi)
Example #60
0
 def infer_distribution(self):
     frequency_counts, _ = histogram(self.data_dropna,
                                     bins=self.distribution_bins)
     self.distribution_probabilities = normalize_given_distribution(
         frequency_counts)