def get5num (self):
     mins = [min(row) for row in self.data]
     p25s = [pylab.percentile(row, 25) for row in self.data]
     medians = [pylab.percentile(row, 50) for row in self.data]
     p75s = [pylab.percentile(row, 75) for row in self.data]
     maxs = [max(row) for row in self.data]
     return [fivenum for fivenum in zip(mins, p25s, medians, p75s, maxs)]
def randomIntensity(im):
    '''
    rescales the intesity of the image to random interval of image intensity distribution
    '''
    return rescale_intensity(
        im,
        in_range=tuple(
            pl.percentile(im, (randRange(0, 10), randRange(90, 100)))),
        out_range=tuple(
            pl.percentile(im, (randRange(0, 10), randRange(90, 100)))))
Exemple #3
0
def plot_fits(func, axis, xvar, popt, pcov, color='grey'):
    ''' Plot the function f with options popt '''
    axis.plot(xvar, func(xvar, *popt), color=color, lw=2, alpha=0.8)
    xsample = np.random.multivariate_normal(popt, pcov, 50000)
    ysample = np.asarray([func(xvar, *pi) for pi in xsample])
    lower = percentile(ysample, 0.3, axis=0)
    upper = percentile(ysample, 99.7, axis=0)
    axis.plot(xvar, lower, color=color, alpha=0.4, lw=1.0)
    axis.plot(xvar, upper, color=color, alpha=0.4, lw=1.0)
    axis.fill_between(xvar, lower, upper, color=color, alpha=0.2)
 def save_sonogram(
     self,
     replace=False,
     n_fft=settings.N_FFT,
     min_freq=settings.MIN_FREQ,
     max_freq=settings.MAX_FREQ,
     dpi=100,
     width=1000,
     height=350,
     max_framerate=settings.MAX_FRAMERATE,
 ):
     filename = self.get_sonogram_name()
     name = os.path.join(settings.SONOGRAM_DIR, filename)
     path = os.path.join(settings.MEDIA_ROOT, name)
     try:
         if not os.path.exists(path):
             replace = True
     except (ValueError, SuspiciousOperation, AttributeError):
         replace = True
     if replace:
         audio, framerate = self.get_audio(max_framerate=max_framerate)
         Pxx, freqs, bins, im = specgram(audio, NFFT=n_fft, Fs=framerate)
         f = where(logical_and(freqs > min_freq, freqs <= max_freq))[0]
         Pxx[where(Pxx > percentile(Pxx[f].flatten(), 99.99))] = percentile(Pxx[f].flatten(), 99.99)
         Pxx[where(Pxx < percentile(Pxx[f].flatten(), 0.01))] = percentile(Pxx[f].flatten(), 0.01)
         clf()
         fig = figure(figsize=(float(width) / dpi, float(height) / dpi), dpi=dpi)
         imshow(
             flipud(10 * log10(Pxx[f,])),
             extent=(bins[0], bins[-1], freqs[f][0], freqs[f][-1]),
             aspect="auto",
             cmap=cm.gray,
         )
         gca().set_ylabel("Frequency (Hz)")
         gca().set_xlabel("Time (s)")
         axis_pixels = gca().transData.transform(np.array((gca().get_xlim(), gca().get_ylim())).T)
         st, created = SonogramTransform.objects.get_or_create(
             n_fft=n_fft,
             framerate=framerate,
             min_freq=min_freq,
             max_freq=max_freq,
             duration=self.duration,
             width=width,
             height=height,
             dpi=dpi,
             top_px=max(axis_pixels[:, 1]),
             bottom_px=min(axis_pixels[:, 1]),
             left_px=min(axis_pixels[:, 0]),
             right_px=max(axis_pixels[:, 0]),
         )
         savefig(open(path, "wb"), format="jpg", dpi=dpi)
         sonogram, created = Sonogram.objects.get_or_create(snippet=self, transform=st, path=name)
         close()
Exemple #5
0
 def save_sonogram(self, replace=False, n_fft=settings.N_FFT, \
     min_freq=settings.MIN_FREQ, \
     max_freq=settings.MAX_FREQ, \
     dpi=100,
     width=1000,
     height=350,
     max_framerate=settings.MAX_FRAMERATE):
     filename = self.get_sonogram_name()
     name = os.path.join(settings.SONOGRAM_DIR, filename)
     path = os.path.join(settings.MEDIA_ROOT, name)
     try:
         if not os.path.exists(path):
             replace = True
     except (ValueError, SuspiciousOperation, AttributeError):
         replace = True
     if replace:
         audio, framerate = self.get_audio(max_framerate=max_framerate)
         Pxx, freqs, bins, im = specgram(audio, NFFT=n_fft, Fs=framerate)
         f = where(logical_and(freqs > min_freq, freqs <= max_freq))[0]
         Pxx[where(Pxx > percentile(Pxx[f].flatten(), 99.99))] = percentile(
             Pxx[f].flatten(), 99.99)
         Pxx[where(Pxx < percentile(Pxx[f].flatten(), 0.01))] = percentile(
             Pxx[f].flatten(), 0.01)
         clf()
         fig = figure(figsize=(float(width) / dpi, float(height) / dpi),
                      dpi=dpi)
         imshow(flipud(10 * log10(Pxx[f, ])),
                extent=(bins[0], bins[-1], freqs[f][0], freqs[f][-1]),
                aspect='auto',
                cmap=cm.gray)
         gca().set_ylabel('Frequency (Hz)')
         gca().set_xlabel('Time (s)')
         axis_pixels = gca().transData.transform(
             np.array((gca().get_xlim(), gca().get_ylim())).T)
         st, created = SonogramTransform.objects.get_or_create(
             n_fft=n_fft,
             framerate=framerate,
             min_freq=min_freq,
             max_freq=max_freq,
             duration=self.duration,
             width=width,
             height=height,
             dpi=dpi,
             top_px=max(axis_pixels[:, 1]),
             bottom_px=min(axis_pixels[:, 1]),
             left_px=min(axis_pixels[:, 0]),
             right_px=max(axis_pixels[:, 0]),
         )
         savefig(open(path, 'wb'), format='jpg', dpi=dpi)
         sonogram, created = Sonogram.objects.get_or_create(snippet=self,
                                                            transform=st,
                                                            path=name)
         close()
def create_uncertainty(model, rate_type):
    '''data without valid uncertainty is given the 10% uncertainty of the data set
    Parameters
    ----------
    model : data.ModelData
      dismod model
    rate_type : str
      a rate model
      'neg_binom', 'binom', 'normal', 'log_norm', 'poisson', 'beta'
    Results
    -------
    model : data.ModelData
      dismod model with measurements of uncertainty for all data
    '''
    # fill any missing covariate data with 0s
    for cv in list(model.input_data.filter(like='x_').columns):
        model.input_data[cv] = model.input_data[cv].fillna([0])
    
    # find indices that are negative for standard error and
    # calculate standard error from effective sample size 
    missing_se = pl.isnan(model.input_data['standard_error']) | (model.input_data['standard_error'] < 0)
    if True in set(missing_se):
        model.input_data['standard_error'][missing_se] = (model.input_data['upper_ci'][missing_se] - model.input_data['lower_ci'][missing_se]) / (2*1.96)
        missing_se_still = pl.isnan(model.input_data['standard_error']) | (model.input_data['standard_error'] < 0)
        if True in set(missing_se_still):
            model.input_data['standard_error'][missing_se_still] = pl.sqrt(model.input_data['value'][missing_se_still]*(1-model.input_data['value'][missing_se_still])/model.input_data['effective_sample_size'][missing_se_still])

    # find indices that contain nan for effective sample size 
    missing_ess = pl.isnan(model.input_data['effective_sample_size'])==1  
    # calculate effective sample size from standard error
    model.input_data['effective_sample_size'][missing_ess] = model.input_data['value'][missing_ess]*(1-model.input_data['value'][missing_ess])/(model.input_data['standard_error'][missing_ess])**2
    
    # find effective sample size of entire dataset
    non_missing_ess_still = pl.isnan(model.input_data['effective_sample_size'])==0 # finds all real numbers
    if False in non_missing_ess_still: 
        percent = pl.percentile(model.input_data['effective_sample_size'][non_missing_ess_still], 10.)
        missing_ess_still = pl.isnan(model.input_data['effective_sample_size'])==1 # finds all nan 
        # replace nan effective sample size with 10th percentile 
        model.input_data['effective_sample_size'][missing_ess_still] = percent
    
    # change values of 0 in lognormal model to 1 observation
    if rate_type == 'log_normal':
        # find indices where values are 0
        zero_val = (model.input_data['value'] == 0)
        # add 1 observation so no values are zero, also change effective sample size
        model.input_data['effective_sample_size'][zero_val] = model.input_data['effective_sample_size'][zero_val] + 1
        model.input_data['value'][zero_val] = 1.0/model.input_data['effective_sample_size'][zero_val]
        # update standard error
        model.input_data['standard_error'][zero_val] = pl.sqrt(model.input_data['value'][zero_val]*(1-model.input_data['value'][zero_val])/model.input_data['effective_sample_size'][zero_val])    
    
    return model
    def add_weights(self, dict_, map_components):
        """
        """
        is_sidewalk = pl.array(dict_['type']) == 'sidewalk'
        is_street = pl.array(dict_['type']) == 'street'

        # relative tree density
        dict_['tree_density'] = pl.array(dict_['tree_number']).astype(float)
        dict_['tree_density'] /= pl.array(dict_['distance'])

        # sidewalks wet to maximum
        # max_density = max(dict_['tree_density'])
        # dict_['tree_density'][is_sidewalk] = max_density

        # dict_['tree_density'] /= max(dict_['tree_density'])
        dict_['tree_density'] /= pl.percentile(
            dict_['tree_density'][is_street], 85.)
        pl.hist(dict_['tree_density'], bins=pl.arange(0., 1.5, 0.05))

        # randomize weight of sidewalks
        dict_['tree_density'][is_sidewalk] = 0.75 + 0.15 * pl.randn(
            len(dict_['tree_density'][is_sidewalk]))
        dict_['tree_density'][dict_['tree_density'] > 1.] = 1.
        # randomize streets with 0 trees
        zero_tree = pl.array(dict_['tree_number']) == 0
        dict_['tree_density'][zero_tree] = 0.25 + 0.15 * pl.randn(
            sum(zero_tree))

        # tree density defined as 1 in parks since no tree data there
        dict_['tree_density'][dict_['tree_density'] > 0.999] = 0.999
        dict_['tree_density'][dict_['tree_density'] < 0.] = 0.
        pl.hist(dict_['tree_density'], bins=pl.arange(0., 1.5, 0.05))

        dict_['min_dist_to_park'] = []
        for i in range(len(dict_['vertex_start'])):
            # mean location of the segment
            lon = dict_['geometry'][i].representative_point().x
            lat = dict_['geometry'][i].representative_point().y

            # distance park - segment
            dist = angles.ang_dist(
                lon, lat,
                angles.rad_to_deg(map_components['park']['rep_x_rad']),
                angles.rad_to_deg(map_components['park']['rep_y_rad']))
            dist = min(dist)
            dict_['min_dist_to_park'].append(dist)
        return dict_
Exemple #8
0
    def get_binned_data_2d(self, n_bins = 10):

        # generating an instance of the class BinnedData2D
        bd_in = BinnedData2D()

        # setting the attribute source of the output equal to the source of the dataset
        bd_in.source = self.source

        # setting the filter of the output equat to the filter of the dataset
        bd_in.filter = self.filter

        # setting the number of bins of the output equal to the number of bin that the method receives as an imput
        bd_in.n_bins = n_bins

        # Extracting pi and imp
        database_reduced = self.db_fil.loc[:,['pi','imp']]

        # Generating the bin extremes
        bin_end_imp_pi = pl.percentile(database_reduced.pi,list(100.*pl.arange(bd_in.n_bins+1.)/(bd_in.n_bins)))

        # Adjusting the last bin extreme
        bin_end_imp_pi[-1] = bin_end_imp_pi[-1] + 0.00001

        # Assigning each point to a bin
        database_reduced['fac_pi'] = pl.digitize(database_reduced.pi,bin_end_imp_pi)

        # Using a groupby in order to generate average pi and imp for each bin, assigning the output to df_imp
        df_gp = database_reduced[['pi','imp','fac_pi']].groupby('fac_pi')
        df_imp = pd.concat([df_gp.mean(),df_gp.imp.std(),df_gp.imp.count()], axis=1)
        df_imp.columns = ['pi','imp','stdd','nn']

        # Setting the data of the output equal to the result of the binning procedure
        bd_in.data = df_imp

        # returning the filled instance of the class BinnedData2D
        return bd_in
Exemple #9
0
        print(dictionary[id], freq)

#step 3 create similarity matrix
index = similarities.Similarity('/tmp/tst',
                                corpus_tfidf.corpus,
                                num_features=corpus.num_terms + 1)
sims = index[corpus_tfidf]
#step 3.1
percentile = {
    'sicp': 90,
    'sicm': 95,
    'dirac': 60,
    'dirac_sections': 95,
    'som': 98
}[prefix]
sims[sims < pylab.percentile(sims, percentile)] = 0

#step 4 convert datatype to networkx Graph
print("converting similarity matrix to networkx Graph")
sims = networkx.Graph(sims, node_list=list(range(len(book))))
networkx.set_node_attributes(sims, 'name',
                             {x: y
                              for x, y in enumerate(labels)})
networkx.set_node_attributes(sims, 'group',
                             {x: y
                              for x, y in enumerate(groups)})
wordcount_normalize = {'sicp': 1000, 'sicm': 500}.get(prefix, 1000)
networkx.set_node_attributes(
    sims, 'wordcount',
    {x: float(y) / wordcount_normalize
     for x, y in enumerate(wordcount)})
for t in range(0, num_timesteps):

    if t == 0:  # First year starting with initial assumptions
        for k in range(num_simulations):
            u[k] = triangular(marketsize_min, marketsize_mode, marketsize_max)
            # triangular distribution of current number of potential users
            s[k] = calc_marketshare(u[k], marketshare_init)
            # market share for product
            r[k] = calc_revenue(u[k], s[k])  # revenue
        # store values in first row of matrices:
        rev[t, :] += r
        usr[t, :] += u
        sha[t, :] = s
        #percentiles of the complete revenue row at time t
        percentiles_rev[t, :] = percentile(rev[t, :], perc_selection)
        percentiles_usr[t, :] = percentile(usr[t, :], perc_selection)
        percentiles_sha[t, :] = percentile(sha[t, :], perc_selection)

    else:  # Following years starting with the previous year's data
        for k in range(num_simulations):
            # estimate how much the market has grown:
            loc = triangular(1, 2, 4)
            scale = triangular(1, 2, 3)
            factor = 3
            marketgrowth = logist(t, loc, scale, factor)
            u[k] += u[k] * marketgrowth  # apply market growth
            s[k] = calc_marketshare(u[k], s[k]) + logist(t, 4, 5, 1)
            # apply market share increase
            r[k] = calc_revenue(u[k], s[k])  # calculate revenue
        # store values in following rows of matrices
Exemple #11
0
if plotting_tmp:
	plot_ge('ge')


###########################
## Generating histogram of the daily rate Q/V_D
##
## Generating evenly populated bins by means of pl.percentile 
## Producing an histogram 
## Plotting
###########################


print('Generating histogram of the daily rate Q/V_D...')
n_bins_h_pi = 500
bin_end_h_pi = pl.percentile(df_in.pi,list(100.*pl.arange(n_bins_h_pi+1.)/(n_bins_h_pi)))
pdf_pi, bins_pi, patches = pl.hist(pl.array(df_in.pi), bin_end_h_pi, normed=1, histtype='step')
bins_pi_cent = (bins_pi[:-1] - bins_pi[1:])/2. + bins_pi[1:]	# finding the center of the bins

if plotting_tmp:
	plot_hist('stat_pi')


###########################
## Measuring temporary impact as a function of the daily rate Q/V_D
##
## Generating evenly populated bins of \pi by means of percentile
## Assigning to each metaorder the corresponding bin in df_in.fac_pi
## Evaluating the average daily rate and impact for each bin, standard deviation and counting by means of a groupby
## Fitting a power-law and a logarithmic function
## Plotting
# example model0, to test vars and test-train
model = mu.load_new_model(model_num, test_area, data_type)
nan_ix = list(model.input_data['effective_sample_size'][pl.isnan(model.input_data['effective_sample_size'])==1].index) # list of nan in effective sample size
model = mu.create_uncertainty(model, 'binom')
for cv in list(model.input_data.filter(like='x_').columns): # fill missing with 0
    model.input_data[cv] = model.input_data[cv].fillna([0])

# example model1, to test test-train
model1 = mu.load_new_model(model_num, test_area, data_type)
model1 = mu.create_uncertainty(model1, 'normal')

# example model2, to test loading and uncertainty
model2 = mu.load_new_model(model_num, test_area, data_type)
non_nan_ix2 = list(model2.input_data['effective_sample_size'][pl.isnan(model2.input_data['effective_sample_size'])==0].index) # list of nan in effective sample size
ten_percent = pl.percentile(model2.input_data.ix[non_nan_ix2, 'effective_sample_size'], 10.)
model2 = mu.create_uncertainty(model2, 'normal')

# find official areas of western europe
areas = [test_area]
for i in model.hierarchy.edges(test_area):
    areas.append(i[1])

# create data for math functions
pred = pandas.DataFrame(pl.arange(10), columns=['mean'])
pred_ui = pandas.DataFrame(pl.hstack((pred-1, pred+1)), columns=['lower','upper'])
obs = pandas.DataFrame(pl.arange(10)+1, columns=['value'])


def test_load_area():
    # find model unique areas
Exemple #13
0
def filter1d(x, mask_only=True, algos=['2sigma']):
    """
    Filter vector with selected algorithms.
    In:
        x : ndarray, input vector
        mask_only : bool, do not touch input vector, just find "bad" values
        algos : list of str, algos list to apply to input vector. The sequence is the same as in the list
    Out:        
        xnew : ndarray, filtered input vector (returned only if mask_only=False)
        mask : ndarray of bool, vector of the same length as x, where "0" represents masked values
    """
    xnew = pl.array(x, dtype='float')
    mask = pl.ones(len(x), dtype='bool')
    for algo in algos:
        if algo == 'diff02':
            for i in range(0, len(xnew)-1):
                if (abs(xnew[i+1] / xnew[i] - 1) > .2): # current rr differs more then 20% of previous one
                    mask[i] = False
            if not mask_only:
                xnew = xnew * mask
                xnew = pl.ma.masked_equal(xnew,0)
                xnew = pl.ma.compressed(xnew)

        elif algo == '2sigma':
            mean = pl.mean(xnew)
            std = pl.std(xnew)
            for i in range(0, len(xnew)):
                if pl.logical_or(xnew[i] < mean - 2*std, mean + 2*std < xnew[i]):
                    mask[i] = False
            if not mask_only:
                xnew = xnew * mask
                xnew = pl.ma.masked_equal(xnew,0)
                xnew = pl.ma.compressed(xnew)

        elif algo == '5per95':
            per5 = pl.percentile(xnew,5)
            per95 = pl.percentile(xnew,95)
            #print per5,per95
            for i in range(0, len(xnew)):
                if pl.logical_or(xnew[i] < per5, per95 < xnew[i]):
                    mask[i] = False
            if not mask_only:
                xnew = xnew * mask
                xnew = pl.ma.masked_equal(xnew,0)
                xnew = pl.ma.compressed(xnew)

        elif algo == '3per97':
            per3 = pl.percentile(xnew,3)
            per97 = pl.percentile(xnew,97)
            for i in range(0, len(xnew)):
                if pl.logical_or(xnew[i] < per3, per97 < xnew[i]):
                    mask[i] = False
            if not mask_only:
                xnew = xnew * mask
                xnew = pl.ma.masked_equal(xnew,0)
                xnew = pl.ma.compressed(xnew)

        elif algo == '1per99':
            per1 = pl.percentile(xnew,1)
            per99 = pl.percentile(xnew,99)
            for i in range(0, len(xnew)):
                if pl.logical_or(xnew[i] <= per1, per99 <= xnew[i]):
                    mask[i] = False
            if not mask_only:
                xnew = xnew * mask
                xnew = pl.ma.masked_equal(xnew,0)
                xnew = pl.ma.compressed(xnew)

        elif algo == 'ho_moody':
            for i in range(2, len(xnew)-2):
                mean = pl.mean([xnew[i-2],xnew[i-1],xnew[i+1],xnew[i+2]])
                if pl.logical_or(xnew[i] < .8 * mean, 1.2 * mean < xnew[i]):
                    mask[i] = False
            if not mask_only:
                xnew = xnew * mask
                xnew = pl.ma.masked_equal(xnew,0)
                xnew = pl.ma.compressed(xnew)

        else:
           warn("Donno anything about such filtration algorithm")

    print "NB: Deleted %0.3f%% of array" % (float(len(x)-pl.sum(mask))/len(x)*100,)
    if mask_only:
        return mask
    else:
        return xnew, mask