def resample(data):
    if len(data) == 0:
        return data

    delta_true = .1
    p = data['mu_pred']+1.e-6

    # TODO: abstract this block of code into; it is also called in
    ## ensure that all data has uncertainty quantified appropriately
    # first replace all missing se from ci
    missing_se = pl.isnan(data['standard_error']) | (data['standard_error'] <= 0)
    data['standard_error'][missing_se] = (data['upper_ci'][missing_se] - data['lower_ci'][missing_se]) / (2*1.96)

    # then replace all missing ess with se
    missing_ess = pl.isnan(data['effective_sample_size'])
    data['effective_sample_size'][missing_ess] = data['value'][missing_ess]*(1-data['value'][missing_ess])/data['standard_error'][missing_ess]**2

    # warn and drop data that doesn't have effective sample size quantified, or is is non-positive
    missing_ess = pl.isnan(data['effective_sample_size']) | (data['effective_sample_size'] < 0)
    if sum(missing_ess) > 0:
        print 'WARNING: %d rows of data has invalid quantification of uncertainty.' % sum(missing_ess)
        data['effective_sample_size'][missing_ess] = 1.0

    n = data['effective_sample_size']

    data['true'] = p
    data['value'] = (1.0 * mc.rnegative_binomial(n*p, delta_true*n*p)) / n

    # uncomment below to test the effect of having very wrong data
    #data['value'] = 0.
    #data['effective_sample_size'] = 1.e6

    return data
    def process(self):
        """rearranges the ping data into a matrix of max amplitude of
        dimensions corrisponding to the power, gain and beam sections."""
        MINSAMPLES = 5
        datadim = self.pingdata.shape
        self.pingmax = pl.zeros((len(self.settings['power']), len(self.settings['gain']), datadim[2]))

        for i, power in enumerate(self.settings['power']):
            for j, gain in enumerate(self.settings['gain']):
                for k in xrange(datadim[2]):
                    sampleindx = pl.find((self.pingdata[:, 1, k]  == power) & (self.pingdata[:, 2, k] == gain))
                    if len(sampleindx)  >  MINSAMPLES:
                        temp = self.pingdata[sampleindx[-MINSAMPLES:], 0, k]
                        tempmax = temp.max()
                        if tempmax == 0:
                            self.pingmax[i, j, k] = pl.NaN
                            self.pingmax[i, j, k] = temp.max()
                        self.pingmax[i, j, k] = pl.NaN

        #The following section removes settings that were collected erroniously.
        #gain settings first
        null = pl.zeros((len(self.settings['gain']), datadim[2]))
        powershortlist = []
        self.havedata = True  # this is an ugly workaround...
        for i, power in enumerate(self.settings['power']):
            test = pl.isnan(self.pingmax[i, :, :] )
            if test.all():
                print 'removing ' + str(power) + ' power setting.'
        for i in powershortlist:
            except IndexError:
                self.havedata = False
        if self.havedata:
            self.pingmax = pl.delete(self.pingmax, powershortlist, 0)
            #then power settings
            null = pl.zeros((len(self.settings['power']), datadim[2]))
            gainshortlist = []
            for i, gain in enumerate(self.settings['gain']):
                test = pl.isnan(self.pingmax[:, i, :])
                if test.all():
                    print 'removing ' + str(gain) + ' gain setting.'
            for i in gainshortlist:
                except IndexError:
                    self.havedata = False
            if self.havedata:
                self.pingmax = pl.delete(self.pingmax, gainshortlist, 1)
                #remove the power and gain to normalize
                self.pingmax = 20*pl.log10(self.pingmax)
                for i, power in enumerate(self.settings['power']):
                    for j, gain in enumerate(self.settings['gain']):
                        self.pingmax[i, j, :] = self.pingmax[i, j, :] - power - gain
def evaluate_model(mod, comment='', data_fname='missing_noisy_data.csv', truth_fname='data.csv'):
    """ Run specified model on existing data (data.csv / missing_noisy_data.csv) and save results in dev_log.csv
    Existing models: %s """ % data_run_models
    if mod not in data_run_models.split(' '):
        raise TypeError, 'Unrecognized model "%s"; must be one of %s' % (mod, data_run_models)

    import model

    print 'loading data'
    data = pl.csv2rec(data_fname)
    truth = pl.csv2rec(truth_fname)
    t0 = time.time()
    print 'generating model'
    mod_mc = eval('model.%s(data)' % mod)

    print 'fitting model with mcmc'
    mod_mc.sample(10000, 5000, 50, verbose=1)
    t1 = time.time()

    print 'summarizing results'

    import graphics
    pl.figure(figsize=(22, 17), dpi=300)
    graphics.plot_all_predictions_over_time(data, mod_mc.predicted, more_data=truth)

    data_stats = mod_mc.data_predicted.stats()
    i_out = [i for i in range(len(data)) if pl.isnan(data.y[i])]
    rmse_abs_out = pl.rms_flat(truth.y[i_out] - data_stats['mean'][i_out])
    rmse_rel_out = 100*pl.rms_flat(1. - data_stats['mean'][i_out]/truth.y[i_out])

    i_in = [i for i in range(len(data)) if not pl.isnan(data.y[i])]
    rmse_abs_in = pl.rms_flat(truth.y[i_in] - data_stats['mean'][i_in])
    rmse_rel_in = 100*pl.rms_flat(1. - data_stats['mean'][i_in]/truth.y[i_in])

    param_stats = mod_mc.param_predicted.stats()
    coverage = 100*pl.sum((truth.y[i_out] >= param_stats['95% HPD interval'][i_out, 0]) & (truth.y[i_out] <= param_stats['95% HPD interval'][i_out, 1])) / float(len(i_out))

    import md5
    data_hash = md5.md5(data).hexdigest()
    results = [mod, t1-t0, rmse_abs_out, rmse_rel_out, rmse_abs_in, rmse_rel_in, coverage,
               len(data), len(pl.unique(data.region)), len(pl.unique(, len(pl.unique(data.year)), len(pl.unique(data.age)), data_hash,
               t0, comment]
    print '%s: time: %.0fs out-of-samp rmse abs=%.1f rel=%.0f in-samp rmse abs=%.1f rel=%.0f coverage=%.0f\ndata: %d rows; %d regions, %d countries %d years %d ages [data hash: %s]\n(run conducted at %f)\n%s' % tuple(results)

    pl.savefig('/home/j/Project/Models/space-time-smoothing/images/%s.png' % t0)  # FIXME: don't hardcode path for saving images

    import csv
    f = open('dev_log.csv', 'a')
    f_csv = csv.writer(f)

    return mod_mc
def create_uncertainty(model, rate_type):
    '''data without valid uncertainty is given the 10% uncertainty of the data set
    model : data.ModelData
      dismod model
    rate_type : str
      a rate model
      'neg_binom', 'binom', 'normal', 'log_norm', 'poisson', 'beta'
    model : data.ModelData
      dismod model with measurements of uncertainty for all data
    # fill any missing covariate data with 0s
    for cv in list(model.input_data.filter(like='x_').columns):
        model.input_data[cv] = model.input_data[cv].fillna([0])
    # find indices that are negative for standard error and
    # calculate standard error from effective sample size 
    missing_se = pl.isnan(model.input_data['standard_error']) | (model.input_data['standard_error'] < 0)
    if True in set(missing_se):
        model.input_data['standard_error'][missing_se] = (model.input_data['upper_ci'][missing_se] - model.input_data['lower_ci'][missing_se]) / (2*1.96)
        missing_se_still = pl.isnan(model.input_data['standard_error']) | (model.input_data['standard_error'] < 0)
        if True in set(missing_se_still):
            model.input_data['standard_error'][missing_se_still] = pl.sqrt(model.input_data['value'][missing_se_still]*(1-model.input_data['value'][missing_se_still])/model.input_data['effective_sample_size'][missing_se_still])

    # find indices that contain nan for effective sample size 
    missing_ess = pl.isnan(model.input_data['effective_sample_size'])==1  
    # calculate effective sample size from standard error
    model.input_data['effective_sample_size'][missing_ess] = model.input_data['value'][missing_ess]*(1-model.input_data['value'][missing_ess])/(model.input_data['standard_error'][missing_ess])**2
    # find effective sample size of entire dataset
    non_missing_ess_still = pl.isnan(model.input_data['effective_sample_size'])==0 # finds all real numbers
    if False in non_missing_ess_still: 
        percent = pl.percentile(model.input_data['effective_sample_size'][non_missing_ess_still], 10.)
        missing_ess_still = pl.isnan(model.input_data['effective_sample_size'])==1 # finds all nan 
        # replace nan effective sample size with 10th percentile 
        model.input_data['effective_sample_size'][missing_ess_still] = percent
    # change values of 0 in lognormal model to 1 observation
    if rate_type == 'log_normal':
        # find indices where values are 0
        zero_val = (model.input_data['value'] == 0)
        # add 1 observation so no values are zero, also change effective sample size
        model.input_data['effective_sample_size'][zero_val] = model.input_data['effective_sample_size'][zero_val] + 1
        model.input_data['value'][zero_val] = 1.0/model.input_data['effective_sample_size'][zero_val]
        # update standard error
        model.input_data['standard_error'][zero_val] = pl.sqrt(model.input_data['value'][zero_val]*(1-model.input_data['value'][zero_val])/model.input_data['effective_sample_size'][zero_val])    
    return model
Exemple #5
    def likelihood(self, verbose=None):
        Compute the log-likelihood of the current simulation based on the number
        of new diagnoses.
        if verbose is None:
            verbose = self['verbose']

        if not self.results['ready']:
                     verbose=verbose)  # To avoid an infinite loop

        loglike = 0
        for d, datum in enumerate(['new_positives']):
            if not pl.isnan(datum):  # Skip days when no tests were performed
                estimate = self.results['diagnoses'][d]
                p = cv.poisson_test(datum, estimate)
                logp = pl.log(p)
                loglike += logp
                if verbose >= 2:
                        f'  {["date"][d]}, data={datum:3.0f}, model={estimate:3.0f}, log(p)={logp:10.4f}, loglike={loglike:10.4f}'

        self.results['likelihood'] = loglike

        if verbose >= 1:
            print(f'Likelihood: {loglike}')

        return loglike
    def sample(self, model, evidence):
        z = evidence['z']
        T, g, h, sigma_g = [evidence[var] for var in ['T', 'g', 'h', 'sigma_g']]
        sigma_z_g = model.known_params['sigma_z_g']
        sigma_z_h = model.known_params['sigma_z_h']
        prior_mu_g, prior_cov_g = [model.hyper_params[var] for var in ['prior_mu_g', 'prior_cov_g']]
        n = len(g)

        # Must be a more concise way to deal with scalar vs vector
        g = g.copy().reshape((n,1))
        h = h.copy().reshape((n,1))
        z_g = ma.asarray(z.copy().reshape((n,1)))
        obs_cov = sigma_z_g**2*ones((n,1,1))
        if sum(T == 0) > 0:
            z_g[T == 0] = nan
        if sum(T == 2) > 0:
            z_g[T == 2] -= h[T == 2]
            obs_cov[T == 2] = sigma_z_h**2
        z_g[isnan(z_g)] = ma.masked

        kalman = self._kalman
        kalman.initial_state_mean = array([prior_mu_g[0],])
        kalman.initial_state_covariance = array([prior_cov_g[0,0],])
        kalman.transition_matrices = eye(1)
        kalman.transition_covariance = array([sigma_g**2,])
        kalman.observation_matrices = eye(1)
        kalman.observation_covariance = obs_cov
        sampled_g = forward_filter_backward_sample(kalman, z_g)

        return sampled_g.reshape((n,))
    def sample(self, model, evidence):
        z, T, g, h, sigma_h, phi  = [evidence[var] for var in ['z', 'T', 'g', 'h', 'sigma_h', 'phi']]
        sigma_z_h = model.known_params['sigma_z_h']
        mu_h = model.known_params['mu_h']
        prior_mu_h = model.hyper_params['prior_mu_h']
        prior_cov_h = model.hyper_params['prior_cov_h']
        n = len(h)

        g = g.copy().reshape((n,1))
        h = h.copy().reshape((n,1))
        z_h = ma.asarray(z.copy().reshape((n,1)))
        if sum(T == 0) > 0:
            z_h[T == 0] = nan
        if sum(T == 1) > 0:
            z_h[T == 1] = nan
        if sum(T == 2) > 0:
            z_h[T == 2] -= g[T == 2]
        z_h[isnan(z_h)] = ma.masked

        kalman = self._kalman
        kalman.initial_state_mean = array([prior_mu_h[0],])
        kalman.initial_state_covariance = array([prior_cov_h[0,0],])
        kalman.transition_matrices = array([phi,])
        kalman.transition_covariance = array([sigma_h**2,])
        kalman.transition_offsets = mu_h*(1-phi)*ones((n, 1))
        kalman.observation_matrices = eye(1)
        kalman.observation_covariance = array([sigma_z_h**2,])
        sampled_h = forward_filter_backward_sample(kalman, z_h)

        return sampled_h.reshape((n,))
def add_noise_to_cube(data, beamfwhm_pix, fluxmap=None):
    import pylab as pl
    s = data.shape
    noise = pl.randn(s[0], s[1], s[2])

    noisescale = 1.
    if type(fluxmap) != type(None):
        noisescale = 1.26 * fluxmap**2
        z = pl.where(pl.isnan(noisescale))
        if len(z[0]) > 0:
            noisescale[z] = 1.

#    from astropy.convolution import convolve_fft,Gaussian2DKernel
#    psf=Gaussian2DKernel(stddev=beamfwhm_pix/2.354)
#    for i in range(s[0]):  # ASSUMES FIRST AXIS IS VEL
#        noise[i]=convolve_fft(noise[i]/noisescale,psf)#,interpolate_nan=True)

    from scipy.ndimage.filters import gaussian_filter
    for i in range(s[0]):  # ASSUMES FIRST AXIS IS VEL
        noise[i] = gaussian_filter(noise[i], beamfwhm_pix / 2.354) / noisescale

    def mad(data, axis=None):
        return pl.nanmedian(pl.absolute(data - pl.nanmedian(data, axis)), axis)

    rms = mad(data)  # rms of original cube
    current_rms = mad(noise)
    noise = rms * noise / current_rms  # scale the noise to have the same rms as the data - there's a sqrt(2) problem I think

    return noise + data
Exemple #9
def add_thermodynamic_constraints(cpl, dG0_f, c_range=(1e-6, 1e-2), T=default_T, bounds=None):   
        For any compound that does not have an explicit bound set by the 'bounds' argument,
        create a bound using the 'margin' variables (the last to columns of A).
    Nc = dG0_f.shape[0]

    if bounds != None and len(bounds) != Nc:
        raise Exception("The concentration bounds list must be the same length as the number of compounds")
    if bounds == None:
        bounds = [(None, None)] * Nc
    for c in xrange(Nc):
        if pylab.isnan(dG0_f[c, 0]):
            continue # unknown dG0_f - cannot bound this compound's concentration at all

        b_low = bounds[c][0] or c_range[0]
        b_high = bounds[c][1] or c_range[1]

        # lower bound: dG0_f + R*T*ln(Cmin) <= x_i
        cpl.variables.set_lower_bounds('c%d' % c, dG0_f[c, 0] + R*T*pylab.log(b_low))

        # upper bound: x_i <= dG0_f + R*T*ln(Cmax)
        cpl.variables.set_upper_bounds('c%d' % c, dG0_f[c, 0] + R*T*pylab.log(b_high))
Exemple #10
def test_from_gbd_json():
    d = data.ModelData.from_gbd_json('tests/dismoditis.json')

    assert len(d.input_data) > 17, 'dismoditis model has more than 17 data points'
    for field in 'data_type value area sex age_start age_end year_start year_end standard_error effective_sample_size lower_ci upper_ci age_weights'.split():
        assert field in d.input_data.columns, 'Input data CSV should have field "%s"' % field
    #assert len(d.input_data.filter(regex='x_').columns) == 1, 'should have added country-level covariates to input data'
    #assert len(d.input_data['x_LDI_id_Updated_7July2011'].dropna().index) > 0

    assert len(d.output_template) > 100
    for field in 'area sex year pop'.split():
        assert field in d.output_template.columns, 'Output template CSV should have field "%s"' % field
    #assert len(d.output_template.filter(regex='x_').columns) == 1, 'should have added country-level covariates to output template'
    #assert len(d.output_template['x_LDI_id_Updated_7July2011'].dropna().index) > 0

    for data_type in 'i p r f rr X'.split():
        for prior in 'smoothness heterogeneity level_value level_bounds increasing decreasing'.split():
            assert prior in d.parameters[data_type], 'Parameters for %s should include prior on %s' % (data_type, prior)

    assert 'CHN' in d.hierarchy.successors('asia_east')
    assert pl.isnan(d.hierarchy['asia_east']['CHN'].get('weight'))
    #assert set(d.hierarchy.node['asia_east'].keys()) == set('area sex year_start year_end pop'.split())
    #assert len(d.nodes_to_fit) == 21*3*2 + 1

    import dismod3
    import simplejson as json
    model = data.ModelData.from_gbd_jsons(json.loads(dismod3.disease_json.DiseaseJson().to_json()))
Exemple #11
    def likelihood(self, weights=None, verbose=None):
        Compute the log-likelihood of the current simulation based on the number
        of new diagnoses.
        if verbose is None:
            verbose = self['verbose']

        if weights is None:
            weights = {}

        loglike = 0
        if is not None:  # Only perform likelihood calculation if data are available
            for key in self.reskeys:
                if key in
                    if key in weights:
                        weight = weights[key]
                        weight = 1
                    for d, datum in enumerate([key]):
                        if not pl.isnan(datum) and d < len(
                            estimate = self.results[key][d]
                            if datum and estimate:
                                p = cvu.poisson_test(datum, estimate)
                                logp = pl.log(p)
                                loglike += weight * logp
                                    f'  {["date"][d]}, data={datum:3.0f}, model={estimate:3.0f}, log(p)={logp:10.4f}, loglike={loglike:10.4f}',
                                    2, verbose)

            self.results['likelihood'] = loglike
            sc.printv(f'Likelihood: {loglike}', 1, verbose)

        return loglike
Exemple #12
 def metric_heat(group):
     if all(pylab.isnan(group[metric])):
         #print metric
     return groupfunc(group[metric])
Exemple #13
def MutationPerClone(x):
    '''Calculates average number and STD of mutations from MutationRecords.dat
    file per clone. Mut_record[0] - number of cells, [1] - number of clones,
    [2] - mean number of point mutations, [3] - STD of point mutations,
    [4] - mean number of duplications, [5] - STD  of duplications,
    [6] - mean number of deletion, [7] - STD of deletion.'''
    Mut_record = p.zeros(10)
    Mut_record[0] = x.shape[0]
    ZERRO = p.zeros((x.shape[0], 1))
    x = p.concatenate((x, ZERRO), axis=1)
    for i in xrange(0, x.shape[0]):
        if (x[i, 5] == 0):
            for j in xrange(i+1, x.shape[0]):
                if (x[j, 0] == x[i, 0]):
                    x[j, 5] = p.nan
    x = x[~p.isnan(x).any(1)]
    Mut_record[1] = x.shape[0]
    Mut_record[2] = x[:, 1].mean()
    Mut_record[3] = x[:, 1].std()
    Mut_record[4] = x[:, 2].mean()
    Mut_record[5] = x[:, 2].std()
    Mut_record[6] = x[:, 3].mean()
    Mut_record[7] = x[:, 3].std()
    Mut_record[8] = x[:, 4].mean()
    Mut_record[9] = x[:, 4].std()
    return Mut_record
Exemple #14
def write_angles():
    prev_neurons = set()
    first_line = False
    if FNAME in os.listdir(OUTDIR):
        df = pd.read_csv('%s/%s' % (OUTDIR, FNAME), skipinitialspace=True)
        neuron_names = list(df['neuron name'])
        neuron_types = list(df['neuron type'])
        prev_neurons = set(zip(neuron_names, neuron_types))
        first_line = True
    i = 0
    with open('%s/%s' % (OUTDIR, FNAME), 'a') as f:
        if first_line:
                'neuron name, neuron type, parent, child1, child2, angle\n')
        directory = DATASETS_DIR
        for cell_type in os.listdir(directory):
            for species in os.listdir(directory + '/' + cell_type):
                for region in os.listdir(directory + '/' + cell_type + '/' +
                    for lab in os.listdir(directory + "/" + cell_type + '/' +
                                          species + '/' + region):
                        for neuron in os.listdir(directory + "/" + cell_type +
                                                 "/" + species + '/' + region +
                                                 '/' + lab):
                            filename = directory + "/" + cell_type + "/" + species + "/" + region + '/' + lab + '/' + neuron

                            if neuron[-8:] != ".CNG.swc":

                            neuron_name = neuron[:-8]

                                graphs = get_neuron_points(filename)
                            except AssertionError:

                            for i, G in enumerate(graphs):
                                neuron_type = NEURON_TYPES[i]

                                if (neuron_name, neuron_type) in prev_neurons:
                                prev_neurons.add((neuron_name, neuron_type))

                                if G == None:

                                print neuron_name, neuron_type

                                angles = get_angles(G)

                                for (parent, child1,
                                     child2), angle in angles.iteritems():
                                    if pylab.isnan(angle):
                                    parent = int(parent)
                                    child1 = int(child1)
                                    child2 = int(child2)
                                    f.write('%s, %s, %d, %d, %d, %f\n' %\
                                            (neuron_name, neuron_type, parent, child1, child2, angle))
Exemple #15
    def sample(self, model, evidence):
        z = evidence['z']
        T, g, sigma_g = [evidence[var] for var in ['T', 'g', 'sigma_g']]
        sigma_z_g = model.known_params['sigma_z_g']
        prior_mu_g, prior_cov_g = [
            model.hyper_params[var] for var in ['prior_mu_g', 'prior_cov_g']
        n = len(g)

        z_g = z.copy()
        z_g[T == 0] = nan
        z_g = ma.asarray(z_g)
        z_g[isnan(z_g)] = ma.masked

        kalman = self._kalman
        kalman.initial_state_mean = prior_mu_g[0]
        kalman.initial_state_covariance = prior_cov_g[0, 0]
        kalman.transition_matrices = 1
        kalman.transition_covariance = sigma_g**2
        kalman.observation_matrices = 1
        kalman.observation_covariance = sigma_z_g**2
        sampled_g = forward_filter_backward_sample(kalman, z_g)

        return sampled_g
Exemple #16
def find_unfeasible_concentrations(S, dG0_f, c_range, c_mid=1e-4, T=default_T, bounds=None, log_stream=None):
        Almost the same as find_pCr, but adds a global restriction on the concentrations (for compounds
        that don't have specific bounds in 'bounds').
        After the solution which optimizes the pCr is found, any concentration which does not confer
        to the limits of c_range will be truncated to the closes allowed concentration.
        If at least one concentration needs to be adjusted, then pCr looses its meaning
        and therefore is returned with the value None.
    dG_f, concentrations, pCr = find_pCr(S, dG0_f, c_mid=c_mid, bounds=bounds, log_stream=log_stream)

    for c in xrange(dG0_f.shape[0]):
        if (pylab.isnan(dG0_f[c, 0])):
            continue # unknown dG0_f - therefore the concentration of this compounds is meaningless

        if ((bounds == None or bounds[c][0] == None) and concentrations[c, 0] < c_range[0]):
            concentrations[c, 0] = c_range[0]
            dG_f[c, 0] = dG0_f[c, 0] + R * T * c_range[0]
            pCr = None
        elif ((bounds == None or bounds[c][1] == None) and concentrations[c, 0] > c_range[1]):
            concentrations[c, 0] = c_range[1]
            dG_f[c, 0] = dG0_f[c, 0] + R * T * c_range[1]
            pCr = None

    return (dG_f, concentrations, pCr)
Exemple #17
def timeseriesstd(times, x, xmean=pylab.nan):
    if pylab.isnan(xmean):
        xmean = timeseriesmean(times, x)
    return pylab.sqrt(1.0 * sum([
        (t2 - t1) * ((x1 - xmean)**2 + (x2 - xmean)**2) / 2
        for t1, t2, x1, x2 in zip(times[0:-1], times[1:], x[0:-1], x[1:])
    ]) / (times[-1] - times[0]))
Exemple #18
    def likelihood(self, verbose=None):
        Compute the log-likelihood of the current simulation based on the number
        of new diagnoses.
        if verbose is None:
            verbose = self['verbose']

        loglike = 0
        if is not None and len(
        ):  # Only perform likelihood calculation if data are available
            for d, datum in enumerate(['new_positives']):
                if not pl.isnan(
                        datum):  # Skip days when no tests were performed
                    estimate = self.results['diagnoses'][d]
                    p = cvu.poisson_test(datum, estimate)
                    logp = pl.log(p)
                    loglike += logp
                        f'  {["date"][d]}, data={datum:3.0f}, model={estimate:3.0f}, log(p)={logp:10.4f}, loglike={loglike:10.4f}',
                        2, verbose)

            self.results['likelihood'] = loglike
            sc.printv(f'Likelihood: {loglike}', 1, verbose)

        return loglike
Exemple #19
def setup(dm, key, data_list, rate_stoch):
    """ Generate the PyMC variables for a log-normal model of
    a function of age

    dm : dismod3.DiseaseModel
      the object containing all the data, priors, and additional
      information (like input and output age-mesh)
    key : str
      the name of the key for everything about this model (priors,
      initial values, estimations)

    data_list : list of data dicts
      the observed data to use in the beta-binomial liklihood function

    rate_stoch : pymc.Stochastic
      a PyMC stochastic (or deterministic) object, with
      len(rate_stoch.value) == len(dm.get_estimation_age_mesh()).

    vars : dict
      Return a dictionary of all the relevant PyMC objects for the
      log-normal model.  vars['rate_stoch'] is of particular
      relevance, for details see the beta_binomial_model
    vars = {}
    est_mesh = dm.get_estimate_age_mesh()
    vars['rate_stoch'] = rate_stoch

    # set up priors and observed data
    prior_str = dm.get_priors(key)
    dismod3.utils.generate_prior_potentials(vars, prior_str, est_mesh)

    vars['observed_rates'] = []
    for d in data_list:
        age_indices = dismod3.utils.indices_for_range(est_mesh, d['age_start'], d['age_end'])
        age_weights = d.get('age_weights', pl.ones(len(age_indices)) / len(age_indices))

        lb, ub = dm.bounds_per_1(d)
        se = (pl.log(ub) - pl.log(lb)) / (2. * 1.96)
        if pl.isnan(se) or se <= 0.:
            se = 1.

        print 'data %d: log(value) = %f, se = %f' % (d['id'], pl.log(dm.value_per_1(d)), se)
        @mc.stochastic(name='obs_%d' % d['id'])
        def obs(f=vars['rate_stoch'],
                tau=se**-2, data=d):
            f_i = dismod3.utils.rate_for_range(f, age_indices, age_weights)
            return mc.normal_like(value, pl.log(f_i), tau)
    return vars
Exemple #20
def load_new_model(disease, country='all', sex=['total', 'male', 'female'], cov='no'):
    '''create disease model with relavtive data
    cov : str
      method to handle covariates
      default is nothing ('no')
      options include, 
        - 'drop' : drop all covartiates
        - 'zero' : missing values replaced with 0
        - 'average' : missing values replaced with average of column
    model ='/home/j/Project/dismod/output/dm-%s'%disease)
    # keep relative data
    if (type(sex)==str) & (sex != 'total'): model.keep(areas=[country], sexes=[sex, 'total'])
    else: model.keep(areas=[country], sexes=sex)
    if (True in pl.isnan(pl.array(model.output_template.filter(like='x_')))) | (True in pl.isnan(pl.array(model.input_data.filter(like='x_')))): 
        print 'Covariates missing, %s method used'%(cov)
        col = model.input_data.filter(like='x_').columns
        for i in col:
            if cov == 'drop': 
                model.input_data = model.input_data.drop(i,1)
                model.output_template = model.output_template.drop(i,1)
            elif cov == 'zero': 
                model.input_data[i] = model.input_data[i].fillna([0])
                model.output_template[i] = model.output_template[i].fillna([0])
            elif cov == 'average': 
                model.input_data[i] = model.input_data[i].fillna([model.input_data[i].mean()])
                model.output_template[i] = model.output_template[i].fillna(model.output_template[i].mean())
    return model
Exemple #21
def get_cod_data_all_causes(iso3='USA', age_group='1_4', sex='F'):
    """ TODO: write doc string for this function"""
    print 'loading', iso3, age_group, sex
    import glob

    cause_list = []
    fpath = '/home/j/Project/Causes of Death/Under Five Deaths/CoD Correct Input Data/v02_prep_%s/%s+*+%s+%s.csv' % (
        iso3, iso3, age_group, sex)
    #fpath = '/home/j/Project/GBD/dalynator/data/cod_correct_input_pos/run_9_cause_*.csv'  # use Mike's validation data
    fnames = glob.glob(fpath)

    # initialize input distribution array
    N = 990  # TODO: get this from the data files
    T = 32  # TODO: get this from the data files
    J = len(fnames)
    F = pl.zeros((N, T, J))

    # fill input distribution array with data from files
    for j, fname in enumerate(sorted(fnames)):
        cause = fname.split('+')[1]  # TODO: make this less brittle and clearer
        #cause = str(j) # use Mike's validation data causes
        print 'loading cause', cause
        F_j = pl.csv2rec(fname)

        for n in range(N):
            F[n, :, j] = F_j['ensemble_d%d' % (n + 1)] / F_j['envelope']
            #F[n, :, j] = F_j['d%d'%(n+1)]/F_j['envelope'] # use Mike's validation data

        assert not pl.any(
            pl.isnan(F)), '%s should have no missing values' % fname

    print 'loading complete'
    return F, cause_list
Exemple #22
def get_cod_data_all_causes(iso3='USA', age_group='1_4', sex='F'):
    """ TODO: write doc string for this function"""
    print 'loading', iso3, age_group, sex
    import glob
    cause_list = []
    fpath = '/home/j/Project/Causes of Death/Under Five Deaths/CoD Correct Input Data/v02_prep_%s/%s+*+%s+%s.csv' % (iso3, iso3, age_group, sex)
    #fpath = '/home/j/Project/GBD/dalynator/data/cod_correct_input_pos/run_9_cause_*.csv'  # use Mike's validation data
    fnames = glob.glob(fpath)

    # initialize input distribution array
    N = 990  # TODO: get this from the data files
    T = 32  # TODO: get this from the data files
    J = len(fnames)
    F = pl.zeros((N, T, J))

    # fill input distribution array with data from files
    for j, fname in enumerate(sorted(fnames)):
        cause = fname.split('+')[1]  # TODO: make this less brittle and clearer
        #cause = str(j) # use Mike's validation data causes
        print 'loading cause', cause
        F_j = pl.csv2rec(fname)

        for n in range(N):
            F[n, :, j] = F_j['ensemble_d%d'%(n+1)]/F_j['envelope']
            #F[n, :, j] = F_j['d%d'%(n+1)]/F_j['envelope'] # use Mike's validation data

        assert not pl.any(pl.isnan(F)), '%s should have no missing values' % fname
    print 'loading complete'
    return F, cause_list
Exemple #23
def test_from_gbd_json():
    d = data.ModelData.from_gbd_json('tests/dismoditis.json')

    assert len(
        d.input_data) > 17, 'dismoditis model has more than 17 data points'
    for field in 'data_type value area sex age_start age_end year_start year_end standard_error effective_sample_size lower_ci upper_ci age_weights'.split(
        assert field in d.input_data.columns, 'Input data CSV should have field "%s"' % field
    #assert len(d.input_data.filter(regex='x_').columns) == 1, 'should have added country-level covariates to input data'
    #assert len(d.input_data['x_LDI_id_Updated_7July2011'].dropna().index) > 0

    assert len(d.output_template) > 100
    for field in 'area sex year pop'.split():
        assert field in d.output_template.columns, 'Output template CSV should have field "%s"' % field
    #assert len(d.output_template.filter(regex='x_').columns) == 1, 'should have added country-level covariates to output template'
    #assert len(d.output_template['x_LDI_id_Updated_7July2011'].dropna().index) > 0

    for data_type in 'i p r f rr X'.split():
        for prior in 'smoothness heterogeneity level_value level_bounds increasing decreasing'.split(
            assert prior in d.parameters[
                data_type], 'Parameters for %s should include prior on %s' % (
                    data_type, prior)

    assert 'CHN' in d.hierarchy.successors('asia_east')
    assert pl.isnan(d.hierarchy['asia_east']['CHN'].get('weight'))
    #assert set(d.hierarchy.node['asia_east'].keys()) == set('area sex year_start year_end pop'.split())
    #assert len(d.nodes_to_fit) == 21*3*2 + 1

    import dismod3
    import simplejson as json
    model = data.ModelData.from_gbd_jsons(
Exemple #24
    def _get_angles(steps,track_length):
        angles = pl.zeros(track_length-2)
        polar = pl.zeros(pl.shape(steps))
        for i in range(track_length-1):
            polar[i,0] = pl.norm(steps[i,:])
            polar[i,1] = pl.arctan(steps[i,0]/steps[i,1])

            if pl.isnan( polar[i,1]):
                polar[i,1] = 0

            if (steps[i,0] >= 0):
                if (steps[i,1] >= 0):
                elif (steps[i,1] < 0):
                    polar[i,1] += 2.*pl.pi
            elif (steps[i,0] < 0):
                if (steps[i,1] >= 0):
                    polar[i,1] += pl.pi
                elif (steps[i,1] < 0):
                    polar[i,1] += pl.pi

        for i in range(track_length-2):
            angles[i] = polar[i+1,1] - polar[i,1]

        return angles
    def sample(self, model, evidence):
        z = evidence['z']
        T, surfaces, sigma_g, sigma_h = [evidence[var] for var in ['T', 'surfaces', 'sigma_g', 'sigma_h']]
        mu_h, phi, sigma_z_g, sigma_z_h = [model.known_params[var] for var in ['mu_h', 'phi', 'sigma_z_g', 'sigma_z_h']]
        prior_mu_g, prior_cov_g = [model.hyper_params[var] for var in ['prior_mu_g', 'prior_cov_g']]
        prior_mu_h, prior_cov_h = [model.hyper_params[var] for var in ['prior_mu_h', 'prior_cov_h']]
        n = len(g)

        y = ma.asarray(ones((n, 2))*nan)
        if sum(T==1) > 0:
            y[T==1, 0] = z[T==1]
        if sum(T==2) > 0:
            y[T==2, 1] = z[T==2]
        y[isnan(y)] = ma.masked

        kalman = self._kalman
        kalman.initial_state_mean=[prior_mu_g[0], prior_mu_h[0]]
        kalman.initial_state_covariance=diag([prior_cov_g[0,0], prior_cov_h[0,0]])
        kalman.transition_matrices=[[1, 0], [0, phi]]
        kalman.transition_offsets =ones((n, 2))*[0, mu_h*(1-phi)]
        kalman.transition_covariance=[[sigma_g**2, 0], [0, sigma_h**2]]
        kalman.observation_matrices=[[1, 0], [1, 1]]
        kalman.observation_covariance=[[sigma_z_g**2, 0], [0, sigma_z_h**2]]
        sampled_surfaces = forward_filter_backward_sample(kalman, y)

        return sampled_surfaces
Exemple #26
def getAngle(t1,c1,t2,c2):
    Get angle between two celestials at t1 and t2 
    Verify if ignoring the k-cordinate makes any sense
    timeit 240 microseconds
    if type(t2) == numpy.ndarray:
            t2 = t2[0]
    elif isnan(t2):
        print "ERROR, t2 is nan!"
        return t2
    p1 = c1.eph(t1)[0]
    p1[2] = 0.0
    p1l = norm(p1)
    p1 /= p1l
    p2 = c2.eph(t2)[0]
    p2[2] = 0.0
    p2l = norm(p2)
    p2 /= p2l
    #if p1l > p2l:
    #    return
Exemple #27
def add_thermodynamic_constraints(cpl,
                                  c_range=(1e-6, 1e-2),
        For any compound that does not have an explicit bound set by the 'bounds' argument,
        create a bound using the 'margin' variables (the last to columns of A).

    Nc = dG0_f.shape[0]

    if bounds != None and len(bounds) != Nc:
        raise Exception(
            "The concentration bounds list must be the same length as the number of compounds"
    if bounds == None:
        bounds = [(None, None)] * Nc

    for c in xrange(Nc):
        if pylab.isnan(dG0_f[c, 0]):
            continue  # unknown dG0_f - cannot bound this compound's concentration at all

        b_low = bounds[c][0] or c_range[0]
        b_high = bounds[c][1] or c_range[1]

        # lower bound: dG0_f + R*T*ln(Cmin) <= x_i
        cpl.variables.set_lower_bounds('c%d' % c,
                                       dG0_f[c, 0] + R * T * pylab.log(b_low))

        # upper bound: x_i <= dG0_f + R*T*ln(Cmax)
        cpl.variables.set_upper_bounds('c%d' % c,
                                       dG0_f[c, 0] + R * T * pylab.log(b_high))
Exemple #28
def make_pCr_problem(S, dG0_f,
    """Creates a Cplex problem for finding the pCr.
    Simply sets up all the constraints. Does not set the objective.
        S: stoichiometric matrix.
        dG0_f: deltaG0'-formation values for all compounds (in kJ/mol) (1 x compounds)
        c_mid: the default concentration to center the pCr on.
        ratio: the ratio between the distance of the upper bound from c_mid
            and the lower bound from c_mid (in logarithmic scale)
        bounds: the concentration bounds for metabolites.
        log_stream: where to write Cplex logs to.
        A cplex.Cplex object for the problem.
    Nc = S.shape[1]
    if Nc != dG0_f.shape[0]:
        raise Exception("The S matrix has %d columns, while the dG0_f vector has %d" % (Nc, dG0_f.shape[0]))
    if bounds and len(bounds) != Nc:
        raise Exception("The concentration bounds list must be the same length as the number of compounds")

    cpl = create_cplex(S, dG0_f, log_stream)
    # Add pC variable.
    cpl.variables.add(names=['pC'], lb=[0], ub=[1e6])
    # Add variables for concentration bounds for each metabolite.
    for c in xrange(Nc):
        if pylab.isnan(dG0_f[c, 0]):
            continue # unknown dG0_f - cannot bound this compound's concentration at all

        # dG at the center concentration.
        dG_f_mid = dG0_f[c, 0] + R*T*pylab.log(c_mid)
        if bounds == None or bounds[c][0] == None:
            # lower bound: x_i + r/(1+r) * R*T*ln(10)*pC >= dG0_f + R*T*ln(Cmid) 
            cpl.linear_constraints.add(senses='G', names=['c%d_lower' % c], rhs=[dG_f_mid])
            cpl.linear_constraints.set_coefficients('c%d_lower' % c, 'c%d' % c, 1)
            cpl.linear_constraints.set_coefficients('c%d_lower' % c, 'pC', R*T*pylab.log(10) * ratio / (ratio + 1.0))
            # this compound has a specific lower bound on its activity
            cpl.variables.set_lower_bounds('c%d' % c, dG0_f[c, 0] + R*T*pylab.log(bounds[c][0]))

        if bounds == None or bounds[c][1] == None:
            # upper bound: x_i - 1/(1+r) * R*T*ln(10)*pC <= dG0_f + R*T*ln(Cmid)
            cpl.linear_constraints.add(senses='L', names=['c%d_upper' % c], rhs=[dG_f_mid])
            cpl.linear_constraints.set_coefficients('c%d_upper' % c, 'c%d' % c, 1)
            cpl.linear_constraints.set_coefficients('c%d_upper' % c, 'pC', -R*T*pylab.log(10) / (ratio + 1.0))
            # this compound has a specific upper bound on its activity
            cpl.variables.set_upper_bounds('c%d' % c, dG0_f[c, 0] + R*T*pylab.log(bounds[c][1]))

    return cpl
Exemple #29
def fe(data):
    """ Fixed Effect model::
        Y_r,c,t = beta * X_r,c,t + e_r,c,t
        e_r,c,t ~ N(0, sigma^2)
    # covariates
    K1 = count_covariates(data, 'x')
    X = pl.array([data['x%d' % i] for i in range(K1)])

    K2 = count_covariates(data, 'w')
    W = pl.array([data['w%d' % i] for i in range(K1)])

    # priors
    beta = mc.Uninformative('beta', value=pl.zeros(K1))
    gamma = mc.Uninformative('gamma', value=pl.zeros(K2))
    sigma_e = mc.Uniform('sigma_e', lower=0, upper=1000, value=1)

    # predictions
    def mu(X=X, beta=beta):
        return, X)

    param_predicted = mu

    def sigma_explained(W=W, gamma=gamma):
        """ sigma_explained_i,r,c,t,a = gamma * W_i,r,c,t,a"""
        return, W)

    def predicted(mu=mu, sigma_explained=sigma_explained, sigma_e=sigma_e):
        return mc.rnormal(mu, 1 / (sigma_explained**2. + sigma_e**2.))

    # likelihood
    i_obs = pl.find(1 - pl.isnan(data.y))

    def obs(value=data.y,
        return mc.normal_like(value[i_obs], mu[i_obs],
                              1. / (sigma_explained[i_obs]**2. + sigma_e**-2.))

    # set up MCMC step methods
    mod_mc = mc.MCMC(vars())
    mod_mc.use_step_method(mc.AdaptiveMetropolis, mod_mc.beta)

    # find good initial conditions with MAP approx
    print 'attempting to maximize likelihood'
    var_list = [mod_mc.beta, mod_mc.obs, mod_mc.sigma_e]
    mc.MAP(var_list).fit(method='fmin_powell', verbose=1)

    return mod_mc
 def clean_estpoints(self):
     """Removes NaN from the estpoints that result from cleaning by the user
     in the extractfitpoints method."""
     temp = self.estpoints.tolist()
     indx = 0
     while indx < len(temp):
         if pl.isnan(temp[indx][1]):
     self.estpoints = pl.array(temp)
Exemple #31
def apply_mask(x):
    Gets arrays with NaN from MAT files and applies python masked_where
    f = pl.find(pl.isnan(x) == 1)
    l1, l2 = x.shape 
    x = pl.ravel(x)
    x[f] = 0
    x.shape = (l1,l2)
    x = == 0, x)
    return x
Exemple #32
def maskOD(data):
    '''Mask too large/small values for plots'''
    for c, wDict in data.items():
        for w, curve in wDict.items():
            curve[(curve > 2) | (curve < 0.01)] = None
            # TODO: Report masks when they occur
            if py.isnan(py.sum(curve)):
                msg = ('Masking value with "nan" in '
                       '{} -- {}'.format(c, w))
                print(msg, file=sys.stderr)
    return data
Exemple #33
def resample(data):
    if len(data) == 0:
        return data

    delta_true = .1
    p = data['mu_pred'] + 1.e-6

    # TODO: abstract this block of code into; it is also called in
    ## ensure that all data has uncertainty quantified appropriately
    # first replace all missing se from ci
    missing_se = pl.isnan(
        data['standard_error']) | (data['standard_error'] <= 0)
    data['standard_error'][missing_se] = (
        data['upper_ci'][missing_se] - data['lower_ci'][missing_se]) / (2 *

    # then replace all missing ess with se
    missing_ess = pl.isnan(data['effective_sample_size'])
    data['effective_sample_size'][missing_ess] = data['value'][missing_ess] * (
        1 -
        data['value'][missing_ess]) / data['standard_error'][missing_ess]**2

    # warn and drop data that doesn't have effective sample size quantified, or is is non-positive
    missing_ess = pl.isnan(
        data['effective_sample_size']) | (data['effective_sample_size'] < 0)
    if sum(missing_ess) > 0:
        print 'WARNING: %d rows of data has invalid quantification of uncertainty.' % sum(
        data['effective_sample_size'][missing_ess] = 1.0

    n = data['effective_sample_size']

    data['true'] = p
    data['value'] = (1.0 *
                     mc.rnegative_binomial(n * p, delta_true * n * p)) / n

    # uncomment below to test the effect of having very wrong data
    #data['value'] = 0.
    #data['effective_sample_size'] = 1.e6

    return data
Exemple #34
def loadFile(objectFileName):
    oimg =

    # Load the IFU data -- Row-stacked spectra
    odata = oimg[1].data
    oError = oimg[2].data
    odata_dim = odata.shape
    wcs = astWCS.WCS(objectFileName, extensionName=1)
    owavelengthStartEnd = wcs.getImageMinMaxWCSCoords()[0:2]
    fiberNumber = wcs.getImageMinMaxWCSCoords()[2:4]
    owavelengthStep = oimg[1].header['CDELT1']

    owavelengthRange = [owavelengthStartEnd[0] + i * owavelengthStep
                        for i in range(odata_dim[1])]

    # Check to make sure we got it right
    if not owavelengthRange[-1] == owavelengthStartEnd[-1]:
        print 'The ending wavelenghts do not match... Exiting'
        # make median sky
        specs = pyl.array([flux for flux in odata])
        skySpec = pyl.median(specs, axis=0)

    RSS = []
    for i in range(int(fiberNumber[1])):
        #oflux = odata[i] - oskyflux
        oflux = odata[i] - skySpec
        oflux[pyl.isnan(oflux)] = 0.0
        oErrorFlux = oError[i]
        #oflux = odata[i]

        # Mask out extreme values in spectrum
        # Just because edges dodgy in efosc
        med = pyl.median(oflux)
        oflux[pyl.greater(abs(oflux), 10.0 * med)] = 0.0001

        objSED = astSED.SED(wavelength=owavelengthRange, flux=oflux)
        #skySED = astSED.SED(wavelength=owavelengthRange, flux=oskyflux)
        skySED = astSED.SED(wavelength=owavelengthRange, flux=skySpec)
        errSED = astSED.SED(wavelength=owavelengthRange, flux=oErrorFlux)

        #  make it > 0 everywhere
        objSED.flux = objSED.flux - objSED.flux.min()
        objSED.flux = objSED.flux / objSED.flux.max()
        errSED.flux = errSED.flux - errSED.flux.min()
        errSED.flux = errSED.flux / errSED.flux.max()
        skySED.flux = skySED.flux - skySED.flux.min()
        skySED.flux = skySED.flux / skySED.flux.max()

        RSS.append({'object': objSED, 'sky': skySED, 'error': errSED})

    return RSS
Exemple #35
def renormalize(x_unpurt,x_puturb,epsilon):
    final_dist = distance(x_unpurt,x_puturb)
    xnew = pl.array([0.0,0.0,0.0,1.0])
    # the new renormalized vx (see lab book #2 pg 61)
    xnew[0] = x_unpurt[0]+(epsilon/final_dist)*(x_puturb[0]-x_unpurt[0])
    xnew[2] = x_unpurt[2]+(epsilon/final_dist)*(x_puturb[2]-x_unpurt[2])

    if pl.isnan(xnew[0]):

    return xnew
Exemple #36
    def CreateFromAliFile(self):
        printStr = ''
        self._lst_ignored_files = []
        self.NumFrames = 0
        created_means = False
        for index, (file_name, utterance_id) in \
                enumerate(zip(self.RawFileList, self.UtteranceIds)):
            printStrNew = '\b' * (len(printStr)+1)
            printStr = "Loading data for utterance #: " + str(index+1)
            printString = printStrNew + printStr
            print printString,

            data = HTK.ReadHTKWithDeltas(file_name)
            if sum(isnan(data)) != 0 or sum(isinf(data)) != 0:

            if not created_means:
                created_means = True
                self.data_dim = data.shape[0]

            self.DataSumSq += (data**2).sum(axis=1).reshape(-1,1)
            self.DataSum += data.sum(axis=1).reshape(-1,1)
            self.NumFrames += data.shape[1]

            if self.Utt2Speaker != None:
                speaker = self.Utt2Speaker[utterance_id]
                self.SpeakerMeans[speaker] += data.sum(axis=1).reshape(-1,1)
                self.SpeakerStds[speaker] += (data**2).sum(axis=1).reshape(-1,1)
                self.SpeakerNumFrames[speaker] += data.shape[1]

        for file_num in self._lst_ignored_files:
            sys.stdout.write("File # " + str(file_num) + " was ignored \
                                   because of errors\n")

        if self.Utt2Speaker != None:
            for speaker in self.Speaker2Utt.keys():
                self.SpeakerMeans[speaker] /= (1.0 *self.SpeakerNumFrames[speaker])
                self.SpeakerStds[speaker] -= self.SpeakerNumFrames[speaker] * \
                self.SpeakerStds[speaker] /= (1.0 *self.SpeakerNumFrames[speaker]-1)
                self.SpeakerStds[speaker][self.SpeakerStds[speaker] < 1e-8] = 1e-8
                self.SpeakerStds[speaker] = sqrt(self.SpeakerStds[speaker])

        self.DataMeanVect = self.DataSum/self.NumFrames
        variances = (self.DataSumSq - self.NumFrames*(self.DataMeanVect**2))/(self.NumFrames-1)
        variances[variances < 1e-8] = 1e-8
        self.DataStdVect = sqrt(variances)
Exemple #37
def load_new_model(disease,
                   sex=['total', 'male', 'female'],
    '''create disease model with relavtive data
    cov : str
      method to handle covariates
      default is nothing ('no')
      options include, 
        - 'drop' : drop all covartiates
        - 'zero' : missing values replaced with 0
        - 'average' : missing values replaced with average of column
    model ='/home/j/Project/dismod/output/dm-%s' % disease)
    # keep relative data
    if (type(sex) == str) & (sex != 'total'):
        model.keep(areas=[country], sexes=[sex, 'total'])
        model.keep(areas=[country], sexes=sex)

    if (True in pl.isnan(pl.array(
            model.output_template.filter(like='x_')))) | (True in pl.isnan(
        print 'Covariates missing, %s method used' % (cov)
        col = model.input_data.filter(like='x_').columns
        for i in col:
            if cov == 'drop':
                model.input_data = model.input_data.drop(i, 1)
                model.output_template = model.output_template.drop(i, 1)
            elif cov == 'zero':
                model.input_data[i] = model.input_data[i].fillna([0])
                model.output_template[i] = model.output_template[i].fillna([0])
            elif cov == 'average':
                model.input_data[i] = model.input_data[i].fillna(
                model.output_template[i] = model.output_template[i].fillna(

    return model
Exemple #38
def runtimes_stats():
    df = pd.read_csv('test_runtimes.csv', skipinitialspace=True)
    print "total trials"
    print len(df['algorithm']) / len(df['algorithm'].unique())

    ratios = []
    labels = []
    weights = []
    hist_algorithms = ['prim', 'khuller']
    algorithm_labels = {'prim': 'Karger', 'khuller': 'Khuller'}


    for algorithm, group in df.groupby('algorithm'):
        print algorithm
        comparisons = group['comparisons'].sum()
        dominated = group['dominated'].sum()
        print float(dominated) / float(
            comparisons), "(", dominated, "/", comparisons, ")"
        print binom_test(dominated, comparisons)
        group = group.groupby('points', as_index=False).agg(pylab.mean)
        pylab.plot(group['points'], group['runtime'], label=algorithm)
        ratio = group['cost ratio']
        ratio = ratio[~pylab.isnan(ratio)]
        ratio = ratio - 1
        print "cost comparisons", len(ratio)
        print "cost ratio", pylab.mean(ratio), "+/-", pylab.std(ratio, ddof=1)

        if algorithm in hist_algorithms:
            weight = pylab.ones_like(ratio) / float(len(ratio))

    pylab.xlabel('number of points')
    pylab.ylabel('rumtime (minutes)')
    pylab.savefig('test_runtimes/runtimes.pdf', format='pdf')

    pylab.hist(ratios, label=labels, weights=weights)
    pylab.xlabel('percent better/worse than Steiner', size=20)
    pylab.ylabel('proportion', size=20)
    ax = pylab.gca()
    pylab.setp(ax.get_legend().get_texts(), fontsize=20)  # for legend text
    pylab.savefig('test_runtimes/cost_ratios_hist.pdf', format='pdf')
Exemple #39
def crunchZfile(f, aCol, sCol, bCol, normFactor):
    Takes a zAveraged... data file generated from the crunchData
    function of this library and produces the arithmetic mean
    as well as the standard error from all seeds.  The error
    is done through the propagation of errors as:
    e = sqrt{ \sum_k (c_k e_k)^2 } where e_k are the individual
    seed's standard errors and c_k are the weighting coefficients
    obeying \sum_k c_k = 1.
    avgs, stds, bins = pl.genfromtxt(f,
                                     usecols=(aCol, sCol, bCol),

    # get rid of any items which are not numbers..
    # this is some beautiful Python juju.
    bins = bins[pl.logical_not(pl.isnan(bins))]
    stds = stds[pl.logical_not(pl.isnan(stds))]
    avgs = avgs[pl.logical_not(pl.isnan(avgs))]

    # normalize data.
    stds *= normFactor
    avgs *= normFactor

    weights = bins / pl.sum(bins)

    avgs *= weights
    stds *= weights  # over-estimates error bars

    stds *= stds

    avg = pl.sum(avgs)
    stdErr = pl.sum(stds)

    stdErr = stdErr**0.5

    return avg, stdErr
Exemple #40
  def identify_nans(self, data, fn):
    private method to identify rows and columns of all nans from grids. This
    happens when the data from multiple GIS databases don't quite align on
    whatever the desired grid is.
    good_x = ~all(isnan(data), axis=0) & self.good_x  # good cols
    good_y = ~all(isnan(data), axis=1) & self.good_y  # good rows

    if any(good_x != self.good_x):
      total_nan_x = sum(good_x == False)
      self.rem_nans = True
      s =  "Warning: %d row(s) of \"%s\" are entirely NaN." % (total_nan_x, fn)
      print_text(s, self.color)

    if any(good_y != self.good_y):
      total_nan_y = sum(good_y == False)
      self.rem_nans = True
      s = "Warning: %d col(s) of \"%s\" are entirely NaN." % (total_nan_y, fn)
      print_text(s, self.color)

    self.good_x = good_x
    self.good_y = good_y
  def identify_nans(self, data, fn):
    private method to identify rows and columns of all nans from grids. This 
    happens when the data from multiple GIS databases don't quite align on 
    whatever the desired grid is.
    #print "::: DataInput identifying NaNs for %s :::" % fn

    good_x = ~all(isnan(data), axis=0) & self.good_x  # good cols
    good_y = ~all(isnan(data), axis=1) & self.good_y  # good rows
    if any(good_x != self.good_x):
      total_nan_x = sum(good_x == False)
      self.rem_nans = True
      print "Warning: %d row(s) of \"%s\" are entirely NaN." % (total_nan_x, fn)

    if any(good_y != self.good_y):
      total_nan_y = sum(good_y == False)
      self.rem_nans = True
      print "Warning: %d col(s) of \"%s\" are entirely NaN." % (total_nan_y, fn)
    self.good_x = good_x
    self.good_y = good_y
Exemple #42
def wrap_to_pi(angle):
    if type(angle) == list:
        angle = array(angle)
    angle %= (2 * pi)
    if type(angle) == ndarray:
        valid = ~pylab.isnan(angle)
        out_of_bounds = pylab.zeros(angle.size, dtype=bool)
        out_of_bounds[valid] = (angle[valid] > pi)
        angle[out_of_bounds] -= (2 * pi)
        return angle
        if angle > pi:
            angle -= (2 * pi)
        return angle
Exemple #43
def fit_quality(time, parameters, noise, repetitions):
    Apply the fitting routine a number of times, as given by
    `repetitions`, and return informations about the fit performance.
    results = []
    errors = []

    from numpy.random import seed

    alpha_psp = AlphaPSP()

    for _ in range(repetitions):

        value = noisy_psp(time=time, noise=noise, **parameters)
        fit_result = fit(alpha_psp,
                         fail_on_negative_cov=[True, True, True, False, False])
        if fit_result is not None:
            result, error, chi2, success = fit_result
            if chi2 < 1.5 and success:
                print(chi2, result)
            print("fit failed:", end=' ')

    keys = alpha_psp.parameter_names()

    result_dict = dict(((key, []) for key in keys))
    error_dict = dict(((key, []) for key in keys))

    for result in results:
        for r, key in zip(result, keys):

    for error in errors:
        for r, key in zip(p.diag(error), keys):
            if p.isnan(p.sqrt(r)):
                print("+++++++", r)

    return ([p.mean(result_dict[key])
             for key in keys], [p.std(result_dict[key]) for key in keys],
            len(results), keys, [result_dict[key] for key in keys],
            [error_dict[key] for key in keys])
Exemple #44
 def from_goal_msg(goal_msg):
     rpy = quat2rpy([
         goal_msg.pos.rotation.w, goal_msg.pos.rotation.x,
         goal_msg.pos.rotation.y, goal_msg.pos.rotation.z
     goal = FootGoal(pos=pl.hstack([
         goal_msg.pos.translation.x, goal_msg.pos.translation.y,
         goal_msg.pos.translation.z, rpy
                         goal_msg.fixed_x, goal_msg.fixed_y,
                         goal_msg.fixed_z, goal_msg.fixed_roll,
                         goal_msg.fixed_pitch, goal_msg.fixed_yaw
                         goal_msg.terrain_path_dist, goal_msg.terrain_height
     if any(pl.isnan(goal.pos[[0, 1, 5]])):
         raise ValueError("I don't know how to handle NaN in x, y, or yaw")
         goal.pos[pl.find(pl.isnan(goal.pos))] = 0
     return goal
Exemple #45
def crunchZfile(f,aCol,sCol,bCol,normFactor):
    Takes a zAveraged... data file generated from the crunchData
    function of this library and produces the arithmetic mean
    as well as the standard error from all seeds.  The error
    is done through the propagation of errors as:
    e = sqrt{ \sum_k (c_k e_k)^2 } where e_k are the individual
    seed's standard errors and c_k are the weighting coefficients
    obeying \sum_k c_k = 1.
    avgs,stds,bins = pl.genfromtxt(f, usecols=(aCol, sCol, bCol),
            unpack=True, delimiter=',')

    # get rid of any items which are not numbers..
    # this is some beautiful Python juju.
    bins = bins[pl.logical_not(pl.isnan(bins))]
    stds = stds[pl.logical_not(pl.isnan(stds))]
    avgs = avgs[pl.logical_not(pl.isnan(avgs))]

    # normalize data.
    stds *= normFactor
    avgs *= normFactor

    weights = bins/pl.sum(bins)

    avgs *= weights
    stds *= weights  # over-estimates error bars

    stds *= stds

    avg = pl.sum(avgs)
    stdErr = pl.sum(stds)

    stdErr = stdErr**0.5

    return avg, stdErr
Exemple #46
 def from_footstep_msg(goal_msg):
     rpy = quat2rpy(
         [goal_msg.pos.rotation.w, goal_msg.pos.rotation.x, goal_msg.pos.rotation.y, goal_msg.pos.rotation.z]
     goal = FootGoal(
         pos=pl.hstack([goal_msg.pos.translation.x, goal_msg.pos.translation.y, goal_msg.pos.translation.z, rpy]),
         terrain_pts=pl.vstack([goal_msg.terrain_path_dist, goal_msg.terrain_height]),
     if any(pl.isnan(goal.pos[[0, 1, 5]])):
         raise ValueError("I don't know how to handle NaN in x, y, or yaw")
         goal.pos[pl.find(pl.isnan(goal.pos))] = 0
     return goal
Exemple #47
  def one_ci(v, ci, bootstraps):
    v = pylab.array(v)
    v =,pylab.isnan(v)).compressed()
    if v.size == 0:
      return pylab.nan, 0, 0 #Nothing to compute

    r = pylab.randint(v.size, size=(v.size, bootstraps))
    booted_samp = pylab.array([pylab.median(v[r[:,n]]) for n in xrange(bootstraps)])

    med = pylab.median(booted_samp)
    idx_lo = int(bootstraps * ci/2.0)
    idx_hi = int(bootstraps * (1.0-ci/2))

    return med, med-booted_samp[idx_lo], booted_samp[idx_hi]-med
Exemple #48
def fe(data):
    """ Fixed Effect model::
        Y_r,c,t = beta * X_r,c,t + e_r,c,t
        e_r,c,t ~ N(0, sigma^2)
    # covariates
    K1 = count_covariates(data, 'x')
    X = pl.array([data['x%d'%i] for i in range(K1)])

    K2 = count_covariates(data, 'w')
    W = pl.array([data['w%d'%i] for i in range(K1)])

    # priors
    beta = mc.Uninformative('beta', value=pl.zeros(K1))
    gamma = mc.Uninformative('gamma', value=pl.zeros(K2))
    sigma_e = mc.Uniform('sigma_e', lower=0, upper=1000, value=1)
    # predictions
    def mu(X=X, beta=beta):
        return, X)
    param_predicted = mu
    def sigma_explained(W=W, gamma=gamma):
        """ sigma_explained_i,r,c,t,a = gamma * W_i,r,c,t,a"""
        return, W)

    def predicted(mu=mu, sigma_explained=sigma_explained, sigma_e=sigma_e):
        return mc.rnormal(mu, 1 / (sigma_explained**2. + sigma_e**2.))

    # likelihood
    i_obs = pl.find(1 - pl.isnan(data.y))
    def obs(value=data.y, i_obs=i_obs, mu=mu, sigma_explained=sigma_explained, sigma_e=sigma_e):
        return mc.normal_like(value[i_obs], mu[i_obs], 1. / (sigma_explained[i_obs]**2. + sigma_e**-2.))

    # set up MCMC step methods
    mod_mc = mc.MCMC(vars())
    mod_mc.use_step_method(mc.AdaptiveMetropolis, mod_mc.beta)

    # find good initial conditions with MAP approx
    print 'attempting to maximize likelihood'
    var_list = [mod_mc.beta, mod_mc.obs, mod_mc.sigma_e]
    mc.MAP(var_list).fit(method='fmin_powell', verbose=1)

    return mod_mc
Exemple #49
    def one_ci(v, ci, bootstraps):
        v = pylab.array(v)
        v =, pylab.isnan(v)).compressed()
        if v.size == 0:
            return pylab.nan, 0, 0  #Nothing to compute

        r = pylab.randint(v.size, size=(v.size, bootstraps))
        booted_samp = pylab.array(
            [pylab.median(v[r[:, n]]) for n in xrange(bootstraps)])

        med = pylab.median(booted_samp)
        idx_lo = int(bootstraps * ci / 2.0)
        idx_hi = int(bootstraps * (1.0 - ci / 2))

        return med, med - booted_samp[idx_lo], booted_samp[idx_hi] - med
Exemple #50
def wrap_to_2pi(angle):
    if type(angle) == float or type(angle) == int:
        if (angle < 0) | (angle >= 2 * pi):
            angle %= (2 * pi)
        return angle
    valid = ~pylab.isnan(angle)
    if type(angle) == list:
        angle = array(angle)
    if type(angle) == ndarray:
        out_of_bounds = pylab.zeros(angle.size, dtype=bool)
        out_of_bounds[valid] = (angle[valid] < 0) | (angle[valid] >= 2 * pi)
        angle[out_of_bounds] %= (2 * pi)
        if (angle[valid] < 0) | (angle[valid] >= 2 * pi):
            angle[valid] %= (2 * pi)
    return angle
Exemple #51
    def forward(self, xs):
        """Perform forward propagation of activations and update the
        internal state for a subsequent call to `backward`.
        Since this performs sequence classification, `xs` is a 2D
        array, with rows representing input vectors at each time step.
        Returns a 2D array whose rows represent output vectors for
        each input vector."""
        ni, ns, na = self.dims
        assert len(xs[0]) == ni
        n = len(xs)
        # self.last_n = n
        N = len(
        if n > N:
            raise ocrolib.RecognitionError("input too large for LSTM model")

        # Both functions are a straightforward implementation of the
        # LSTM equations. It is possible to abstract this further and
        # represent gates and memory cells as individual data structures.
        # However, that is several times slower and the extra abstraction
        # isn't actually all that useful.

        """Perform forward propagation of activations for a simple LSTM layer."""
        for t in range(n):
            prev = zeros(ns) if t == 0 else self.output[t - 1]
            self.source[t, 0] = 1
            self.source[t, 1 : 1 + ni] = xs[t]
            self.source[t, 1 + ni :] = prev
            self.gix[t] = dot(self.WGI, self.source[t])
            self.gfx[t] = dot(self.WGF, self.source[t])
            self.gox[t] = dot(self.WGO, self.source[t])
            self.cix[t] = dot(self.WCI, self.source[t])
            if t > 0:
                self.gix[t] += self.WIP * self.state[t - 1]
                self.gfx[t] += self.WFP * self.state[t - 1]
  [t] = ffunc(self.gix[t])
  [t] = ffunc(self.gfx[t])
  [t] = gfunc(self.cix[t])
            self.state[t] =[t] *[t]
            if t > 0:
                self.state[t] +=[t] * self.state[t - 1]
                self.gox[t] += self.WOP * self.state[t]
            self.go[t] = ffunc(self.gox[t])
            self.output[t] = hfunc(self.state[t]) * self.go[t]

        assert not isnan(self.output[:n]).any()
        return self.output[:n]
Exemple #52
    def forward(self, xs):
        """Perform forward propagation of activations and update the
        internal state for a subsequent call to `backward`.
        Since this performs sequence classification, `xs` is a 2D
        array, with rows representing input vectors at each time step.
        Returns a 2D array whose rows represent output vectors for
        each input vector."""
        ni, ns, na = self.dims
        assert len(xs[0]) == ni
        n = len(xs)
        # self.last_n = n
        N = len(
        if n > N:
            raise ocrolib.RecognitionError("input too large for LSTM model")

        # Both functions are a straightforward implementation of the
        # LSTM equations. It is possible to abstract this further and
        # represent gates and memory cells as individual data structures.
        # However, that is several times slower and the extra abstraction
        # isn't actually all that useful.
        """Perform forward propagation of activations for a simple LSTM layer."""
        for t in range(n):
            prev = zeros(ns) if t == 0 else self.output[t - 1]
            self.source[t, 0] = 1
            self.source[t, 1:1 + ni] = xs[t]
            self.source[t, 1 + ni:] = prev
            self.gix[t] = dot(self.WGI, self.source[t])
            self.gfx[t] = dot(self.WGF, self.source[t])
            self.gox[t] = dot(self.WGO, self.source[t])
            self.cix[t] = dot(self.WCI, self.source[t])
            if t > 0:
                self.gix[t] += self.WIP * self.state[t - 1]
                self.gfx[t] += self.WFP * self.state[t - 1]
  [t] = ffunc(self.gix[t])
  [t] = ffunc(self.gfx[t])
  [t] = gfunc(self.cix[t])
            self.state[t] =[t] *[t]
            if t > 0:
                self.state[t] +=[t] * self.state[t - 1]
                self.gox[t] += self.WOP * self.state[t]
            self.go[t] = ffunc(self.gox[t])
            self.output[t] = hfunc(self.state[t]) * self.go[t]

        assert not isnan(self.output[:n]).any()
        return self.output[:n]
def fit_map(hit_map, signal_map, guess_center, guess_fwhm):
    radius = guess_fwhm*3.0
    nside = healpy.npix2nside(hit_map.size)
    mask = hit_map == 0
    mask |= pylab.isnan(signal_map)
    indices = _get_close_pixels(guess_center, radius, nside, mask)[0]
    p0 = guess_center + [1.0, guess_fwhm]
    def _model(indices, *params):
        center_lon, center_lat, scale, fwhm = params
        thetas, lons = healpy.pix2ang(nside, indices)
        lats = pylab.pi/2.0 - thetas
        dxs = (lons - center_lon) * pylab.cos(lats)
        dxs = (dxs + pylab.pi) % (2.0*pylab.pi) - pylab.pi
        dys = lats - center_lat
        return normal_2d(dxs, dys, scale, fwhm=fwhm)
    fit = curve_fit(_model, indices, signal_map[indices], p0=p0)
    return fit
Exemple #54
 def forward(self,xs):
     ni,ns,na                    = self.dims
     assert len(xs[0])==ni
     n                           = len(xs)
     self.last_n                 = n
     N                           = len(
     if n>N: 
         raise RecognitionError("[i] Input too large for model")
     forward_py( n,N,ni,ns,na,xs,
     assert not isnan(self.output[:n]).any()
     return self.output[:n]
Exemple #55
    def mu_age_p(logit_C0=logit_C0, i=rate["i"]["mu_age"], r=rate["r"]["mu_age"], f=rate["f"]["mu_age"]):

        # for acute conditions, it is silly to use ODE solver to
        # derive prevalence, and it can be approximated with a simple
        # transformation of incidence
        if r.min() > 5.99:
            return i / (r + m_all + f)

        C0 = mc.invlogit(logit_C0)

        x = pl.hstack((i, r, f, 1 - C0, C0))
        y = fun.forward(0, x)

        susceptible = y[:N]
        condition = y[N:]

        p = condition / (susceptible + condition)
        p[pl.isnan(p)] = 0.0
        return p
def ctc_align_targets(outputs,
    outputs = maximum(lo, outputs)
    outputs = outputs * 1.0 / sum(outputs, axis=1)[:, newaxis]
    match = dot(outputs, targets.T)
    lmatch = log(match)
    assert not isnan(lmatch).any()
    both = forwardbackward(lmatch)
    epath = exp(both - amax(both))
    l = sum(epath, axis=0)[newaxis, :]
    epath /= where(l == 0.0, 1e-9, l)
    aligned = maximum(lo, dot(epath, targets))
    l = sum(aligned, axis=1)[:, newaxis]
    aligned /= where(l == 0.0, 1e-9, l)
    return aligned
def validate_complex_model(N_rep=20, simulation=good_complex_sim):
    q = pandas.DataFrame()
    for n in range(N_rep):
        # simulate data and fit model
        d, m = simulation()

        # tally posterior quantiles
        results = {}

        for var in 'eta_cross_eta eta delta_mu delta_beta beta gamma mu sigma'.split():
            for j, var_j in enumerate(d[var]):
                stats = m[var].stats()
                results['%s_%d'%(var, j)] = [(var_j > m[var].trace()[:,j]).sum() / float(stats['n'])]
        # add y_mis
        k = 0
        for j, n_j in enumerate(d['n']):
            for i in range(n_j):
                if pl.isnan(m['y'][j][i]):
                    results['y_mis_%d'%k] = [(d['y'][j][i] > m['y_pred'][j].trace()[:,i]).sum() / float(stats['n'])]
                    k += 1

        q = q.append(pandas.DataFrame(results, index=['q_rep_%d'%n]))

    results = validation_transform(q)

    # display results
        [[r'$y_{mis}$', results.filter(like='y_mis').columns],
         [r'$\eta\times\eta$', results.filter(like='eta_cross_eta').columns],
         [r'$\eta$', results.filter(regex='eta_\d').columns],
         [r'$\delta_\mu$', results.filter(like='delta_mu').columns],
         [r'$\delta_\beta$', results.filter(like='delta_beta').columns],
         [r'$\sigma$', results.filter(like='sigma').columns],
         [r'$\beta$', results.filter(regex='^beta').columns],
         [r'$\gamma$', results.filter(regex='gamma').columns],
         [r'$\mu$', results.filter(regex='^mu').columns],

    return results
Exemple #58
    def sample(self, model, evidence):
        z, T, g, h, sigma_h, phi = [
            evidence[var] for var in ['z', 'T', 'g', 'h', 'sigma_h', 'phi']
        sigma_z_h = model.known_params['sigma_z_h']
        mu_h = model.known_params['mu_h']
        prior_mu_h = model.hyper_params['prior_mu_h']
        prior_cov_h = model.hyper_params['prior_cov_h']
        n = len(h)

        g = g.copy().reshape((n, 1))
        h = h.copy().reshape((n, 1))
        z_h = ma.asarray(z.copy().reshape((n, 1)))
        if sum(T == 0) > 0:
            z_h[T == 0] = nan
        if sum(T == 1) > 0:
            z_h[T == 1] = nan
        if sum(T == 2) > 0:
            z_h[T == 2] -= g[T == 2]
        z_h[isnan(z_h)] = ma.masked

        kalman = self._kalman
        kalman.initial_state_mean = array([
        kalman.initial_state_covariance = array([
            prior_cov_h[0, 0],
        kalman.transition_matrices = array([
        kalman.transition_covariance = array([
        kalman.transition_offsets = mu_h * (1 - phi) * ones((n, 1))
        kalman.observation_matrices = eye(1)
        kalman.observation_covariance = array([
        sampled_h = forward_filter_backward_sample(kalman, z_h)

        return sampled_h.reshape((n, ))