def resample(data):
    if len(data) == 0:
        return data

    delta_true = .1
    p = data['mu_pred']+1.e-6


    # TODO: abstract this block of code into rate_model.py; it is also called in data_model.py
    ## ensure that all data has uncertainty quantified appropriately
    # first replace all missing se from ci
    missing_se = pl.isnan(data['standard_error']) | (data['standard_error'] <= 0)
    data['standard_error'][missing_se] = (data['upper_ci'][missing_se] - data['lower_ci'][missing_se]) / (2*1.96)

    # then replace all missing ess with se
    missing_ess = pl.isnan(data['effective_sample_size'])
    data['effective_sample_size'][missing_ess] = data['value'][missing_ess]*(1-data['value'][missing_ess])/data['standard_error'][missing_ess]**2

    # warn and drop data that doesn't have effective sample size quantified, or is is non-positive
    missing_ess = pl.isnan(data['effective_sample_size']) | (data['effective_sample_size'] < 0)
    if sum(missing_ess) > 0:
        print 'WARNING: %d rows of data has invalid quantification of uncertainty.' % sum(missing_ess)
        data['effective_sample_size'][missing_ess] = 1.0

    n = data['effective_sample_size']

    data['true'] = p
    data['value'] = (1.0 * mc.rnegative_binomial(n*p, delta_true*n*p)) / n

    # uncomment below to test the effect of having very wrong data
    #data['value'] = 0.
    #data['effective_sample_size'] = 1.e6

    return data
Example #2
0
    def process(self):
        """rearranges the ping data into a matrix of max amplitude of
        dimensions corrisponding to the power, gain and beam sections."""
        MINSAMPLES = 5
        datadim = self.pingdata.shape
        self.pingmax = pl.zeros((len(self.settings['power']), len(self.settings['gain']), datadim[2]))

        for i, power in enumerate(self.settings['power']):
            for j, gain in enumerate(self.settings['gain']):
                for k in xrange(datadim[2]):
                    sampleindx = pl.find((self.pingdata[:, 1, k]  == power) & (self.pingdata[:, 2, k] == gain))
                    if len(sampleindx)  >  MINSAMPLES:
                        temp = self.pingdata[sampleindx[-MINSAMPLES:], 0, k]
                        tempmax = temp.max()
                        if tempmax == 0:
                            self.pingmax[i, j, k] = pl.NaN
                        else:
                            self.pingmax[i, j, k] = temp.max()
                    else:
                        self.pingmax[i, j, k] = pl.NaN

        #The following section removes settings that were collected erroniously.
        #gain settings first
        null = pl.zeros((len(self.settings['gain']), datadim[2]))
        powershortlist = []
        self.havedata = True  # this is an ugly workaround...
        for i, power in enumerate(self.settings['power']):
            test = pl.isnan(self.pingmax[i, :, :] )
            if test.all():
                powershortlist.append(i)
                print 'removing ' + str(power) + ' power setting.'
        for i in powershortlist:
            try:
                self.settings['power'].pop(i)
            except IndexError:
                self.havedata = False
        if self.havedata:
            self.pingmax = pl.delete(self.pingmax, powershortlist, 0)
            #then power settings
            null = pl.zeros((len(self.settings['power']), datadim[2]))
            gainshortlist = []
            for i, gain in enumerate(self.settings['gain']):
                test = pl.isnan(self.pingmax[:, i, :])
                if test.all():
                    gainshortlist.append(i)
                    print 'removing ' + str(gain) + ' gain setting.'
            for i in gainshortlist:
                try:
                    self.settings['gain'].pop(i)
                except IndexError:
                    self.havedata = False
            if self.havedata:
                self.pingmax = pl.delete(self.pingmax, gainshortlist, 1)
                #remove the power and gain to normalize
                self.pingmax = 20*pl.log10(self.pingmax)
                for i, power in enumerate(self.settings['power']):
                    for j, gain in enumerate(self.settings['gain']):
                        self.pingmax[i, j, :] = self.pingmax[i, j, :] - power - gain
def evaluate_model(mod, comment='', data_fname='missing_noisy_data.csv', truth_fname='data.csv'):
    """ Run specified model on existing data (data.csv / missing_noisy_data.csv) and save results in dev_log.csv
    Existing models: %s """ % data_run_models
    if mod not in data_run_models.split(' '):
        raise TypeError, 'Unrecognized model "%s"; must be one of %s' % (mod, data_run_models)

    import model
    reload(model)

    print 'loading data'
    data = pl.csv2rec(data_fname)
    truth = pl.csv2rec(truth_fname)
    
    t0 = time.time()
    print 'generating model'
    mod_mc = eval('model.%s(data)' % mod)

    print 'fitting model with mcmc'
    mod_mc.sample(10000, 5000, 50, verbose=1)
    t1 = time.time()

    print 'summarizing results'

    import graphics
    reload(graphics)
    pl.figure(figsize=(22, 17), dpi=300)
    pl.clf()
    graphics.plot_all_predictions_over_time(data, mod_mc.predicted, more_data=truth)

    data_stats = mod_mc.data_predicted.stats()
    i_out = [i for i in range(len(data)) if pl.isnan(data.y[i])]
    rmse_abs_out = pl.rms_flat(truth.y[i_out] - data_stats['mean'][i_out])
    rmse_rel_out = 100*pl.rms_flat(1. - data_stats['mean'][i_out]/truth.y[i_out])

    i_in = [i for i in range(len(data)) if not pl.isnan(data.y[i])]
    rmse_abs_in = pl.rms_flat(truth.y[i_in] - data_stats['mean'][i_in])
    rmse_rel_in = 100*pl.rms_flat(1. - data_stats['mean'][i_in]/truth.y[i_in])

    param_stats = mod_mc.param_predicted.stats()
    coverage = 100*pl.sum((truth.y[i_out] >= param_stats['95% HPD interval'][i_out, 0]) & (truth.y[i_out] <= param_stats['95% HPD interval'][i_out, 1])) / float(len(i_out))

    import md5
    data_hash = md5.md5(data).hexdigest()
    results = [mod, t1-t0, rmse_abs_out, rmse_rel_out, rmse_abs_in, rmse_rel_in, coverage,
               len(data), len(pl.unique(data.region)), len(pl.unique(data.country)), len(pl.unique(data.year)), len(pl.unique(data.age)), data_hash,
               t0, comment]
    print '%s: time: %.0fs out-of-samp rmse abs=%.1f rel=%.0f in-samp rmse abs=%.1f rel=%.0f coverage=%.0f\ndata: %d rows; %d regions, %d countries %d years %d ages [data hash: %s]\n(run conducted at %f)\n%s' % tuple(results)

    pl.savefig('/home/j/Project/Models/space-time-smoothing/images/%s.png' % t0)  # FIXME: don't hardcode path for saving images

    import csv
    f = open('dev_log.csv', 'a')
    f_csv = csv.writer(f)
    f_csv.writerow(results)
    f.close()

    return mod_mc
def create_uncertainty(model, rate_type):
    '''data without valid uncertainty is given the 10% uncertainty of the data set
    Parameters
    ----------
    model : data.ModelData
      dismod model
    rate_type : str
      a rate model
      'neg_binom', 'binom', 'normal', 'log_norm', 'poisson', 'beta'
    Results
    -------
    model : data.ModelData
      dismod model with measurements of uncertainty for all data
    '''
    # fill any missing covariate data with 0s
    for cv in list(model.input_data.filter(like='x_').columns):
        model.input_data[cv] = model.input_data[cv].fillna([0])
    
    # find indices that are negative for standard error and
    # calculate standard error from effective sample size 
    missing_se = pl.isnan(model.input_data['standard_error']) | (model.input_data['standard_error'] < 0)
    if True in set(missing_se):
        model.input_data['standard_error'][missing_se] = (model.input_data['upper_ci'][missing_se] - model.input_data['lower_ci'][missing_se]) / (2*1.96)
        missing_se_still = pl.isnan(model.input_data['standard_error']) | (model.input_data['standard_error'] < 0)
        if True in set(missing_se_still):
            model.input_data['standard_error'][missing_se_still] = pl.sqrt(model.input_data['value'][missing_se_still]*(1-model.input_data['value'][missing_se_still])/model.input_data['effective_sample_size'][missing_se_still])

    # find indices that contain nan for effective sample size 
    missing_ess = pl.isnan(model.input_data['effective_sample_size'])==1  
    # calculate effective sample size from standard error
    model.input_data['effective_sample_size'][missing_ess] = model.input_data['value'][missing_ess]*(1-model.input_data['value'][missing_ess])/(model.input_data['standard_error'][missing_ess])**2
    
    # find effective sample size of entire dataset
    non_missing_ess_still = pl.isnan(model.input_data['effective_sample_size'])==0 # finds all real numbers
    if False in non_missing_ess_still: 
        percent = pl.percentile(model.input_data['effective_sample_size'][non_missing_ess_still], 10.)
        missing_ess_still = pl.isnan(model.input_data['effective_sample_size'])==1 # finds all nan 
        # replace nan effective sample size with 10th percentile 
        model.input_data['effective_sample_size'][missing_ess_still] = percent
    
    # change values of 0 in lognormal model to 1 observation
    if rate_type == 'log_normal':
        # find indices where values are 0
        zero_val = (model.input_data['value'] == 0)
        # add 1 observation so no values are zero, also change effective sample size
        model.input_data['effective_sample_size'][zero_val] = model.input_data['effective_sample_size'][zero_val] + 1
        model.input_data['value'][zero_val] = 1.0/model.input_data['effective_sample_size'][zero_val]
        # update standard error
        model.input_data['standard_error'][zero_val] = pl.sqrt(model.input_data['value'][zero_val]*(1-model.input_data['value'][zero_val])/model.input_data['effective_sample_size'][zero_val])    
    
    return model
Example #5
0
def test_from_gbd_json():
    d = data.ModelData.from_gbd_json('tests/dismoditis.json')

    assert len(d.input_data) > 17, 'dismoditis model has more than 17 data points'
    for field in 'data_type value area sex age_start age_end year_start year_end standard_error effective_sample_size lower_ci upper_ci age_weights'.split():
        assert field in d.input_data.columns, 'Input data CSV should have field "%s"' % field
    #assert len(d.input_data.filter(regex='x_').columns) == 1, 'should have added country-level covariates to input data'
    #assert len(d.input_data['x_LDI_id_Updated_7July2011'].dropna().index) > 0

    assert len(d.output_template) > 100
    for field in 'area sex year pop'.split():
        assert field in d.output_template.columns, 'Output template CSV should have field "%s"' % field
    #assert len(d.output_template.filter(regex='x_').columns) == 1, 'should have added country-level covariates to output template'
    #assert len(d.output_template['x_LDI_id_Updated_7July2011'].dropna().index) > 0

    for data_type in 'i p r f rr X'.split():
        for prior in 'smoothness heterogeneity level_value level_bounds increasing decreasing'.split():
            assert prior in d.parameters[data_type], 'Parameters for %s should include prior on %s' % (data_type, prior)

    assert 'CHN' in d.hierarchy.successors('asia_east')
    assert pl.isnan(d.hierarchy['asia_east']['CHN'].get('weight'))
    #assert set(d.hierarchy.node['asia_east'].keys()) == set('area sex year_start year_end pop'.split())
    #assert len(d.nodes_to_fit) == 21*3*2 + 1

    import dismod3
    import simplejson as json
    model = data.ModelData.from_gbd_jsons(json.loads(dismod3.disease_json.DiseaseJson().to_json()))
    def sample(self, model, evidence):
        z = evidence['z']
        T, g, h, sigma_g = [evidence[var] for var in ['T', 'g', 'h', 'sigma_g']]
        sigma_z_g = model.known_params['sigma_z_g']
        sigma_z_h = model.known_params['sigma_z_h']
        prior_mu_g, prior_cov_g = [model.hyper_params[var] for var in ['prior_mu_g', 'prior_cov_g']]
        n = len(g)

        # Must be a more concise way to deal with scalar vs vector
        g = g.copy().reshape((n,1))
        h = h.copy().reshape((n,1))
        z_g = ma.asarray(z.copy().reshape((n,1)))
        obs_cov = sigma_z_g**2*ones((n,1,1))
        if sum(T == 0) > 0:
            z_g[T == 0] = nan
        if sum(T == 2) > 0:
            z_g[T == 2] -= h[T == 2]
            obs_cov[T == 2] = sigma_z_h**2
        z_g[isnan(z_g)] = ma.masked

        kalman = self._kalman
        kalman.initial_state_mean = array([prior_mu_g[0],])
        kalman.initial_state_covariance = array([prior_cov_g[0,0],])
        kalman.transition_matrices = eye(1)
        kalman.transition_covariance = array([sigma_g**2,])
        kalman.observation_matrices = eye(1)
        kalman.observation_covariance = obs_cov
        sampled_g = forward_filter_backward_sample(kalman, z_g)

        return sampled_g.reshape((n,))
    def sample(self, model, evidence):
        z, T, g, h, sigma_h, phi  = [evidence[var] for var in ['z', 'T', 'g', 'h', 'sigma_h', 'phi']]
        sigma_z_h = model.known_params['sigma_z_h']
        mu_h = model.known_params['mu_h']
        prior_mu_h = model.hyper_params['prior_mu_h']
        prior_cov_h = model.hyper_params['prior_cov_h']
        n = len(h)

        g = g.copy().reshape((n,1))
        h = h.copy().reshape((n,1))
        z_h = ma.asarray(z.copy().reshape((n,1)))
        if sum(T == 0) > 0:
            z_h[T == 0] = nan
        if sum(T == 1) > 0:
            z_h[T == 1] = nan
        if sum(T == 2) > 0:
            z_h[T == 2] -= g[T == 2]
        z_h[isnan(z_h)] = ma.masked

        kalman = self._kalman
        kalman.initial_state_mean = array([prior_mu_h[0],])
        kalman.initial_state_covariance = array([prior_cov_h[0,0],])
        kalman.transition_matrices = array([phi,])
        kalman.transition_covariance = array([sigma_h**2,])
        kalman.transition_offsets = mu_h*(1-phi)*ones((n, 1))
        kalman.observation_matrices = eye(1)
        kalman.observation_covariance = array([sigma_z_h**2,])
        sampled_h = forward_filter_backward_sample(kalman, z_h)

        return sampled_h.reshape((n,))
Example #8
0
def find_unfeasible_concentrations(S, dG0_f, c_range, c_mid=1e-4, T=default_T, bounds=None, log_stream=None):
    """ 
        Almost the same as find_pCr, but adds a global restriction on the concentrations (for compounds
        that don't have specific bounds in 'bounds').
        After the solution which optimizes the pCr is found, any concentration which does not confer
        to the limits of c_range will be truncated to the closes allowed concentration.
        If at least one concentration needs to be adjusted, then pCr looses its meaning
        and therefore is returned with the value None.
    """
    dG_f, concentrations, pCr = find_pCr(S, dG0_f, c_mid=c_mid, bounds=bounds, log_stream=log_stream)

    for c in xrange(dG0_f.shape[0]):
        if (pylab.isnan(dG0_f[c, 0])):
            continue # unknown dG0_f - therefore the concentration of this compounds is meaningless

        if ((bounds == None or bounds[c][0] == None) and concentrations[c, 0] < c_range[0]):
            concentrations[c, 0] = c_range[0]
            dG_f[c, 0] = dG0_f[c, 0] + R * T * c_range[0]
            pCr = None
        elif ((bounds == None or bounds[c][1] == None) and concentrations[c, 0] > c_range[1]):
            concentrations[c, 0] = c_range[1]
            dG_f[c, 0] = dG0_f[c, 0] + R * T * c_range[1]
            pCr = None

    return (dG_f, concentrations, pCr)
Example #9
0
def add_thermodynamic_constraints(cpl, dG0_f, c_range=(1e-6, 1e-2), T=default_T, bounds=None):   
    """
        For any compound that does not have an explicit bound set by the 'bounds' argument,
        create a bound using the 'margin' variables (the last to columns of A).
    """
    
    Nc = dG0_f.shape[0]

    if bounds != None and len(bounds) != Nc:
        raise Exception("The concentration bounds list must be the same length as the number of compounds")
    if bounds == None:
        bounds = [(None, None)] * Nc
    
    for c in xrange(Nc):
        if pylab.isnan(dG0_f[c, 0]):
            continue # unknown dG0_f - cannot bound this compound's concentration at all

        b_low = bounds[c][0] or c_range[0]
        b_high = bounds[c][1] or c_range[1]

        # lower bound: dG0_f + R*T*ln(Cmin) <= x_i
        cpl.variables.set_lower_bounds('c%d' % c, dG0_f[c, 0] + R*T*pylab.log(b_low))

        # upper bound: x_i <= dG0_f + R*T*ln(Cmax)
        cpl.variables.set_upper_bounds('c%d' % c, dG0_f[c, 0] + R*T*pylab.log(b_high))
Example #10
0
 def metric_heat(group):
     if all(pylab.isnan(group[metric])):
         #print metric
         #describe_group(group)
         pass
        
     return groupfunc(group[metric])
Example #11
0
def getAngle(t1,c1,t2,c2):
    ''' 
    Get angle between two celestials at t1 and t2 
    
    Verify if ignoring the k-cordinate makes any sense
    
    timeit 240 microseconds
    '''
    if type(t2) == numpy.ndarray:
            t2 = t2[0]
    elif isnan(t2):
        print "ERROR, t2 is nan!"
        return t2
    
    p1 = c1.eph(t1)[0]
    p1[2] = 0.0
    p1l = norm(p1)
    p1 /= p1l
    
    p2 = c2.eph(t2)[0]
    p2[2] = 0.0
    p2l = norm(p2)
    p2 /= p2l
    
    #if p1l > p2l:
    return p1.dot(p2)
    #else:
    #    return p1.dot(p2)
    
    '''
Example #12
0
def load_new_model(disease, country='all', sex=['total', 'male', 'female'], cov='no'):
    '''create disease model with relavtive data
    cov : str
      method to handle covariates
      default is nothing ('no')
      options include, 
        - 'drop' : drop all covartiates
        - 'zero' : missing values replaced with 0
        - 'average' : missing values replaced with average of column
    '''
    model = dismod3.data.load('/home/j/Project/dismod/output/dm-%s'%disease)
    # keep relative data
    if (type(sex)==str) & (sex != 'total'): model.keep(areas=[country], sexes=[sex, 'total'])
    else: model.keep(areas=[country], sexes=sex)
    
    if (True in pl.isnan(pl.array(model.output_template.filter(like='x_')))) | (True in pl.isnan(pl.array(model.input_data.filter(like='x_')))): 
        print 'Covariates missing, %s method used'%(cov)
        col = model.input_data.filter(like='x_').columns
        for i in col:
            if cov == 'drop': 
                model.input_data = model.input_data.drop(i,1)
                model.output_template = model.output_template.drop(i,1)
            elif cov == 'zero': 
                model.input_data[i] = model.input_data[i].fillna([0])
                model.output_template[i] = model.output_template[i].fillna([0])
            elif cov == 'average': 
                model.input_data[i] = model.input_data[i].fillna([model.input_data[i].mean()])
                model.output_template[i] = model.output_template[i].fillna(model.output_template[i].mean())
    
    return model
Example #13
0
def get_cod_data_all_causes(iso3='USA', age_group='1_4', sex='F'):
    """ TODO: write doc string for this function"""
    print 'loading', iso3, age_group, sex
    import glob
    
    cause_list = []
    fpath = '/home/j/Project/Causes of Death/Under Five Deaths/CoD Correct Input Data/v02_prep_%s/%s+*+%s+%s.csv' % (iso3, iso3, age_group, sex)
    #fpath = '/home/j/Project/GBD/dalynator/data/cod_correct_input_pos/run_9_cause_*.csv'  # use Mike's validation data
    fnames = glob.glob(fpath)

    # initialize input distribution array
    N = 990  # TODO: get this from the data files
    T = 32  # TODO: get this from the data files
    J = len(fnames)
    F = pl.zeros((N, T, J))

    # fill input distribution array with data from files
    for j, fname in enumerate(sorted(fnames)):
        cause = fname.split('+')[1]  # TODO: make this less brittle and clearer
        #cause = str(j) # use Mike's validation data causes
        print 'loading cause', cause
        F_j = pl.csv2rec(fname)

        for n in range(N):
            F[n, :, j] = F_j['ensemble_d%d'%(n+1)]/F_j['envelope']
            #F[n, :, j] = F_j['d%d'%(n+1)]/F_j['envelope'] # use Mike's validation data

        assert not pl.any(pl.isnan(F)), '%s should have no missing values' % fname
        cause_list.append(cause)
    
    print 'loading complete'
    return F, cause_list
    def sample(self, model, evidence):
        z = evidence['z']
        T, surfaces, sigma_g, sigma_h = [evidence[var] for var in ['T', 'surfaces', 'sigma_g', 'sigma_h']]
        mu_h, phi, sigma_z_g, sigma_z_h = [model.known_params[var] for var in ['mu_h', 'phi', 'sigma_z_g', 'sigma_z_h']]
        prior_mu_g, prior_cov_g = [model.hyper_params[var] for var in ['prior_mu_g', 'prior_cov_g']]
        prior_mu_h, prior_cov_h = [model.hyper_params[var] for var in ['prior_mu_h', 'prior_cov_h']]
        n = len(g)

        y = ma.asarray(ones((n, 2))*nan)
        if sum(T==1) > 0:
            y[T==1, 0] = z[T==1]
        if sum(T==2) > 0:
            y[T==2, 1] = z[T==2]
        y[isnan(y)] = ma.masked

        kalman = self._kalman
        kalman.initial_state_mean=[prior_mu_g[0], prior_mu_h[0]]
        kalman.initial_state_covariance=diag([prior_cov_g[0,0], prior_cov_h[0,0]])
        kalman.transition_matrices=[[1, 0], [0, phi]]
        kalman.transition_offsets =ones((n, 2))*[0, mu_h*(1-phi)]
        kalman.transition_covariance=[[sigma_g**2, 0], [0, sigma_h**2]]
        kalman.observation_matrices=[[1, 0], [1, 1]]
        kalman.observation_covariance=[[sigma_z_g**2, 0], [0, sigma_z_h**2]]
        sampled_surfaces = forward_filter_backward_sample(kalman, y)

        return sampled_surfaces
Example #15
0
def setup(dm, key, data_list, rate_stoch):
    """ Generate the PyMC variables for a log-normal model of
    a function of age

    Parameters
    ----------
    dm : dismod3.DiseaseModel
      the object containing all the data, priors, and additional
      information (like input and output age-mesh)
      
    key : str
      the name of the key for everything about this model (priors,
      initial values, estimations)

    data_list : list of data dicts
      the observed data to use in the beta-binomial liklihood function

    rate_stoch : pymc.Stochastic
      a PyMC stochastic (or deterministic) object, with
      len(rate_stoch.value) == len(dm.get_estimation_age_mesh()).

    Results
    -------
    vars : dict
      Return a dictionary of all the relevant PyMC objects for the
      log-normal model.  vars['rate_stoch'] is of particular
      relevance, for details see the beta_binomial_model
    """
    vars = {}
    est_mesh = dm.get_estimate_age_mesh()
    vars['rate_stoch'] = rate_stoch

    # set up priors and observed data
    prior_str = dm.get_priors(key)
    dismod3.utils.generate_prior_potentials(vars, prior_str, est_mesh)

    vars['observed_rates'] = []
    for d in data_list:
        age_indices = dismod3.utils.indices_for_range(est_mesh, d['age_start'], d['age_end'])
        age_weights = d.get('age_weights', pl.ones(len(age_indices)) / len(age_indices))

        lb, ub = dm.bounds_per_1(d)
        se = (pl.log(ub) - pl.log(lb)) / (2. * 1.96)
        if pl.isnan(se) or se <= 0.:
            se = 1.

        print 'data %d: log(value) = %f, se = %f' % (d['id'], pl.log(dm.value_per_1(d)), se)
        
        @mc.observed
        @mc.stochastic(name='obs_%d' % d['id'])
        def obs(f=vars['rate_stoch'],
                age_indices=age_indices,
                age_weights=age_weights,
                value=pl.log(dm.value_per_1(d)),
                tau=se**-2, data=d):
            f_i = dismod3.utils.rate_for_range(f, age_indices, age_weights)
            return mc.normal_like(value, pl.log(f_i), tau)
        vars['observed_rates'].append(obs)
        
    return vars
Example #16
0
    def _get_angles(steps,track_length):
        angles = pl.zeros(track_length-2)
        polar = pl.zeros(pl.shape(steps))
        for i in range(track_length-1):
            polar[i,0] = pl.norm(steps[i,:])
            polar[i,1] = pl.arctan(steps[i,0]/steps[i,1])

            if pl.isnan( polar[i,1]):
                polar[i,1] = 0

            if (steps[i,0] >= 0):
                if (steps[i,1] >= 0):
                    pass
                elif (steps[i,1] < 0):
                    polar[i,1] += 2.*pl.pi
            elif (steps[i,0] < 0):
                if (steps[i,1] >= 0):
                    polar[i,1] += pl.pi
                elif (steps[i,1] < 0):
                    polar[i,1] += pl.pi

        for i in range(track_length-2):
            angles[i] = polar[i+1,1] - polar[i,1]

        return angles
Example #17
0
def make_pCr_problem(S, dG0_f,
                     c_mid=1e-3,
                     ratio=3.0,
                     T=default_T,
                     bounds=None,
                     log_stream=None):
    """Creates a Cplex problem for finding the pCr.
    
    Simply sets up all the constraints. Does not set the objective.
    
    Args:
        S: stoichiometric matrix.
        dG0_f: deltaG0'-formation values for all compounds (in kJ/mol) (1 x compounds)
        c_mid: the default concentration to center the pCr on.
        ratio: the ratio between the distance of the upper bound from c_mid
            and the lower bound from c_mid (in logarithmic scale)
        bounds: the concentration bounds for metabolites.
        log_stream: where to write Cplex logs to.
    
    Returns:
        A cplex.Cplex object for the problem.
    """
    Nc = S.shape[1]
    if Nc != dG0_f.shape[0]:
        raise Exception("The S matrix has %d columns, while the dG0_f vector has %d" % (Nc, dG0_f.shape[0]))
    if bounds and len(bounds) != Nc:
        raise Exception("The concentration bounds list must be the same length as the number of compounds")

    cpl = create_cplex(S, dG0_f, log_stream)
    
    # Add pC variable.
    cpl.variables.add(names=['pC'], lb=[0], ub=[1e6])
    
    # Add variables for concentration bounds for each metabolite.
    for c in xrange(Nc):
        if pylab.isnan(dG0_f[c, 0]):
            continue # unknown dG0_f - cannot bound this compound's concentration at all

        # dG at the center concentration.
        dG_f_mid = dG0_f[c, 0] + R*T*pylab.log(c_mid)
        if bounds == None or bounds[c][0] == None:
            # lower bound: x_i + r/(1+r) * R*T*ln(10)*pC >= dG0_f + R*T*ln(Cmid) 
            cpl.linear_constraints.add(senses='G', names=['c%d_lower' % c], rhs=[dG_f_mid])
            cpl.linear_constraints.set_coefficients('c%d_lower' % c, 'c%d' % c, 1)
            cpl.linear_constraints.set_coefficients('c%d_lower' % c, 'pC', R*T*pylab.log(10) * ratio / (ratio + 1.0))
        else:
            # this compound has a specific lower bound on its activity
            cpl.variables.set_lower_bounds('c%d' % c, dG0_f[c, 0] + R*T*pylab.log(bounds[c][0]))

        if bounds == None or bounds[c][1] == None:
            # upper bound: x_i - 1/(1+r) * R*T*ln(10)*pC <= dG0_f + R*T*ln(Cmid)
            cpl.linear_constraints.add(senses='L', names=['c%d_upper' % c], rhs=[dG_f_mid])
            cpl.linear_constraints.set_coefficients('c%d_upper' % c, 'c%d' % c, 1)
            cpl.linear_constraints.set_coefficients('c%d_upper' % c, 'pC', -R*T*pylab.log(10) / (ratio + 1.0))
        else:
            # this compound has a specific upper bound on its activity
            cpl.variables.set_upper_bounds('c%d' % c, dG0_f[c, 0] + R*T*pylab.log(bounds[c][1]))

    return cpl
Example #18
0
 def clean_estpoints(self):
     """Removes NaN from the estpoints that result from cleaning by the user
     in the extractfitpoints method."""
     temp = self.estpoints.tolist()
     indx = 0
     while indx < len(temp):
         if pl.isnan(temp[indx][1]):
             temp.pop(indx)
         else:
             indx+=1
     self.estpoints = pl.array(temp)
def apply_mask(x):
    """
    Gets arrays with NaN from MAT files and applies python masked_where
    """
    f = pl.find(pl.isnan(x) == 1)
    l1, l2 = x.shape 
    x = pl.ravel(x)
    x[f] = 0
    x.shape = (l1,l2)
    x = pl.ma.masked_where(x == 0, x)
    return x
Example #20
0
def maskOD(data):
    '''Mask too large/small values for plots'''
    for c, wDict in data.items():
        for w, curve in wDict.items():
            curve[(curve > 2) | (curve < 0.01)] = None
            # TODO: Report masks when they occur
            if py.isnan(py.sum(curve)):
                msg = ('Masking value with "nan" in '
                       '{} -- {}'.format(c, w))
                print(msg, file=sys.stderr)
    return data
Example #21
0
def renormalize(x_unpurt,x_puturb,epsilon):
    final_dist = distance(x_unpurt,x_puturb)
    xnew = pl.array([0.0,0.0,0.0,1.0])
    # the new renormalized vx (see lab book #2 pg 61)
    xnew[0] = x_unpurt[0]+(epsilon/final_dist)*(x_puturb[0]-x_unpurt[0])
    xnew[2] = x_unpurt[2]+(epsilon/final_dist)*(x_puturb[2]-x_unpurt[2])

    if pl.isnan(xnew[0]):
        print('RENORMALIZED PARRALEL VECTORS !!! FIX THIS!!!!')
        sys.exit()

    return xnew
Example #22
0
def loadFile(objectFileName):
    oimg = pyfits.open(objectFileName)

    # Load the IFU data -- Row-stacked spectra
    odata = oimg[1].data
    oError = oimg[2].data
    odata_dim = odata.shape
    wcs = astWCS.WCS(objectFileName, extensionName=1)
    owavelengthStartEnd = wcs.getImageMinMaxWCSCoords()[0:2]
    fiberNumber = wcs.getImageMinMaxWCSCoords()[2:4]
    owavelengthStep = oimg[1].header['CDELT1']

    owavelengthRange = [owavelengthStartEnd[0] + i * owavelengthStep
                        for i in range(odata_dim[1])]

    # Check to make sure we got it right
    if not owavelengthRange[-1] == owavelengthStartEnd[-1]:
        print 'The ending wavelenghts do not match... Exiting'
        sys.exit(1)
    else:
        # make median sky
        specs = pyl.array([flux for flux in odata])
        skySpec = pyl.median(specs, axis=0)

    RSS = []
    for i in range(int(fiberNumber[1])):
        #oflux = odata[i] - oskyflux
        oflux = odata[i] - skySpec
        oflux[pyl.isnan(oflux)] = 0.0
        oErrorFlux = oError[i]
        #oflux = odata[i]

        # Mask out extreme values in spectrum
        # Just because edges dodgy in efosc
        med = pyl.median(oflux)
        oflux[pyl.greater(abs(oflux), 10.0 * med)] = 0.0001

        objSED = astSED.SED(wavelength=owavelengthRange, flux=oflux)
        #skySED = astSED.SED(wavelength=owavelengthRange, flux=oskyflux)
        skySED = astSED.SED(wavelength=owavelengthRange, flux=skySpec)
        errSED = astSED.SED(wavelength=owavelengthRange, flux=oErrorFlux)

        #  make it > 0 everywhere
        objSED.flux = objSED.flux - objSED.flux.min()
        objSED.flux = objSED.flux / objSED.flux.max()
        errSED.flux = errSED.flux - errSED.flux.min()
        errSED.flux = errSED.flux / errSED.flux.max()
        skySED.flux = skySED.flux - skySED.flux.min()
        skySED.flux = skySED.flux / skySED.flux.max()

        RSS.append({'object': objSED, 'sky': skySED, 'error': errSED})

    return RSS
Example #23
0
  def identify_nans(self, data, fn):
    """ 
    private method to identify rows and columns of all nans from grids. This 
    happens when the data from multiple GIS databases don't quite align on 
    whatever the desired grid is.
    """
    #print "::: DataInput identifying NaNs for %s :::" % fn

    good_x = ~all(isnan(data), axis=0) & self.good_x  # good cols
    good_y = ~all(isnan(data), axis=1) & self.good_y  # good rows
    
    if any(good_x != self.good_x):
      total_nan_x = sum(good_x == False)
      self.rem_nans = True
      print "Warning: %d row(s) of \"%s\" are entirely NaN." % (total_nan_x, fn)

    if any(good_y != self.good_y):
      total_nan_y = sum(good_y == False)
      self.rem_nans = True
      print "Warning: %d col(s) of \"%s\" are entirely NaN." % (total_nan_y, fn)
    
    self.good_x = good_x
    self.good_y = good_y
Example #24
0
def crunchZfile(f,aCol,sCol,bCol,normFactor):
    '''
    Takes a zAveraged... data file generated from the crunchData
    function of this library and produces the arithmetic mean
    as well as the standard error from all seeds.  The error
    is done through the propagation of errors as:
    e = sqrt{ \sum_k (c_k e_k)^2 } where e_k are the individual
    seed's standard errors and c_k are the weighting coefficients
    obeying \sum_k c_k = 1.
    '''
    avgs,stds,bins = pl.genfromtxt(f, usecols=(aCol, sCol, bCol),
            unpack=True, delimiter=',')

    # get rid of any items which are not numbers..
    # this is some beautiful Python juju.
    bins = bins[pl.logical_not(pl.isnan(bins))]
    stds = stds[pl.logical_not(pl.isnan(stds))]
    avgs = avgs[pl.logical_not(pl.isnan(avgs))]

    # normalize data.
    stds *= normFactor
    avgs *= normFactor

    weights = bins/pl.sum(bins)

    avgs *= weights
    stds *= weights  # over-estimates error bars

    stds *= stds

    avg = pl.sum(avgs)
    stdErr = pl.sum(stds)

    stdErr = stdErr**0.5

    return avg, stdErr
Example #25
0
 def from_footstep_msg(goal_msg):
     rpy = quat2rpy(
         [goal_msg.pos.rotation.w, goal_msg.pos.rotation.x, goal_msg.pos.rotation.y, goal_msg.pos.rotation.z]
     )
     goal = FootGoal(
         pos=pl.hstack([goal_msg.pos.translation.x, goal_msg.pos.translation.y, goal_msg.pos.translation.z, rpy]),
         step_speed=goal_msg.params.step_speed,
         step_height=goal_msg.params.step_height,
         step_id=goal_msg.id,
         pos_fixed=[
             goal_msg.fixed_x,
             goal_msg.fixed_y,
             goal_msg.fixed_z,
             goal_msg.fixed_roll,
             goal_msg.fixed_pitch,
             goal_msg.fixed_yaw,
         ],
         is_right_foot=goal_msg.is_right_foot,
         is_in_contact=goal_msg.is_in_contact,
         bdi_step_duration=goal_msg.params.bdi_step_duration,
         bdi_sway_duration=goal_msg.params.bdi_sway_duration,
         bdi_lift_height=goal_msg.params.bdi_lift_height,
         bdi_toe_off=goal_msg.params.bdi_toe_off,
         bdi_knee_nominal=goal_msg.params.bdi_knee_nominal,
         bdi_max_body_accel=goal_msg.params.bdi_max_body_accel,
         bdi_max_foot_vel=goal_msg.params.bdi_max_foot_vel,
         bdi_sway_end_dist=goal_msg.params.bdi_sway_end_dist,
         bdi_step_end_dist=goal_msg.params.bdi_step_end_dist,
         support_contact_groups=goal_msg.params.support_contact_groups,
         terrain_pts=pl.vstack([goal_msg.terrain_path_dist, goal_msg.terrain_height]),
     )
     if any(pl.isnan(goal.pos[[0, 1, 5]])):
         raise ValueError("I don't know how to handle NaN in x, y, or yaw")
     else:
         goal.pos[pl.find(pl.isnan(goal.pos))] = 0
     return goal
Example #26
0
  def one_ci(v, ci, bootstraps):
    v = pylab.array(v)
    v = pylab.ma.masked_array(v,pylab.isnan(v)).compressed()
    if v.size == 0:
      return pylab.nan, 0, 0 #Nothing to compute

    r = pylab.randint(v.size, size=(v.size, bootstraps))
    booted_samp = pylab.array([pylab.median(v[r[:,n]]) for n in xrange(bootstraps)])
    booted_samp.sort()

    med = pylab.median(booted_samp)
    idx_lo = int(bootstraps * ci/2.0)
    idx_hi = int(bootstraps * (1.0-ci/2))

    return med, med-booted_samp[idx_lo], booted_samp[idx_hi]-med
Example #27
0
File: model.py Project: flaxter/gbd
def fe(data):
    """ Fixed Effect model::
    
        Y_r,c,t = beta * X_r,c,t + e_r,c,t
        e_r,c,t ~ N(0, sigma^2)
    """
    # covariates
    K1 = count_covariates(data, 'x')
    X = pl.array([data['x%d'%i] for i in range(K1)])

    K2 = count_covariates(data, 'w')
    W = pl.array([data['w%d'%i] for i in range(K1)])

    # priors
    beta = mc.Uninformative('beta', value=pl.zeros(K1))
    gamma = mc.Uninformative('gamma', value=pl.zeros(K2))
    sigma_e = mc.Uniform('sigma_e', lower=0, upper=1000, value=1)
    
    # predictions
    @mc.deterministic
    def mu(X=X, beta=beta):
        return pl.dot(beta, X)
    param_predicted = mu
    @mc.deterministic
    def sigma_explained(W=W, gamma=gamma):
        """ sigma_explained_i,r,c,t,a = gamma * W_i,r,c,t,a"""
        return pl.dot(gamma, W)

    @mc.deterministic
    def predicted(mu=mu, sigma_explained=sigma_explained, sigma_e=sigma_e):
        return mc.rnormal(mu, 1 / (sigma_explained**2. + sigma_e**2.))

    # likelihood
    i_obs = pl.find(1 - pl.isnan(data.y))
    @mc.observed
    def obs(value=data.y, i_obs=i_obs, mu=mu, sigma_explained=sigma_explained, sigma_e=sigma_e):
        return mc.normal_like(value[i_obs], mu[i_obs], 1. / (sigma_explained[i_obs]**2. + sigma_e**-2.))

    # set up MCMC step methods
    mod_mc = mc.MCMC(vars())
    mod_mc.use_step_method(mc.AdaptiveMetropolis, mod_mc.beta)

    # find good initial conditions with MAP approx
    print 'attempting to maximize likelihood'
    var_list = [mod_mc.beta, mod_mc.obs, mod_mc.sigma_e]
    mc.MAP(var_list).fit(method='fmin_powell', verbose=1)

    return mod_mc
Example #28
0
    def forward(self, xs):
        """Perform forward propagation of activations and update the
        internal state for a subsequent call to `backward`.
        Since this performs sequence classification, `xs` is a 2D
        array, with rows representing input vectors at each time step.
        Returns a 2D array whose rows represent output vectors for
        each input vector."""
        ni, ns, na = self.dims
        assert len(xs[0]) == ni
        n = len(xs)
        # self.last_n = n
        N = len(self.gi)
        if n > N:
            raise ocrolib.RecognitionError("input too large for LSTM model")
        self.reset(n)

        # Both functions are a straightforward implementation of the
        # LSTM equations. It is possible to abstract this further and
        # represent gates and memory cells as individual data structures.
        # However, that is several times slower and the extra abstraction
        # isn't actually all that useful.

        """Perform forward propagation of activations for a simple LSTM layer."""
        for t in range(n):
            prev = zeros(ns) if t == 0 else self.output[t - 1]
            self.source[t, 0] = 1
            self.source[t, 1 : 1 + ni] = xs[t]
            self.source[t, 1 + ni :] = prev
            self.gix[t] = dot(self.WGI, self.source[t])
            self.gfx[t] = dot(self.WGF, self.source[t])
            self.gox[t] = dot(self.WGO, self.source[t])
            self.cix[t] = dot(self.WCI, self.source[t])
            if t > 0:
                self.gix[t] += self.WIP * self.state[t - 1]
                self.gfx[t] += self.WFP * self.state[t - 1]
            self.gi[t] = ffunc(self.gix[t])
            self.gf[t] = ffunc(self.gfx[t])
            self.ci[t] = gfunc(self.cix[t])
            self.state[t] = self.ci[t] * self.gi[t]
            if t > 0:
                self.state[t] += self.gf[t] * self.state[t - 1]
                self.gox[t] += self.WOP * self.state[t]
            self.go[t] = ffunc(self.gox[t])
            self.output[t] = hfunc(self.state[t]) * self.go[t]

        assert not isnan(self.output[:n]).any()
        return self.output[:n]
Example #29
0
File: ism.py Project: peterhm/gbd
    def mu_age_p(logit_C0=logit_C0, i=rate["i"]["mu_age"], r=rate["r"]["mu_age"], f=rate["f"]["mu_age"]):

        # for acute conditions, it is silly to use ODE solver to
        # derive prevalence, and it can be approximated with a simple
        # transformation of incidence
        if r.min() > 5.99:
            return i / (r + m_all + f)

        C0 = mc.invlogit(logit_C0)

        x = pl.hstack((i, r, f, 1 - C0, C0))
        y = fun.forward(0, x)

        susceptible = y[:N]
        condition = y[N:]

        p = condition / (susceptible + condition)
        p[pl.isnan(p)] = 0.0
        return p
def validate_complex_model(N_rep=20, simulation=good_complex_sim):
    q = pandas.DataFrame()
    for n in range(N_rep):
        # simulate data and fit model
        d, m = simulation()

        # tally posterior quantiles
        results = {}

        for var in 'eta_cross_eta eta delta_mu delta_beta beta gamma mu sigma'.split():
            for j, var_j in enumerate(d[var]):
                stats = m[var].stats()
                results['%s_%d'%(var, j)] = [(var_j > m[var].trace()[:,j]).sum() / float(stats['n'])]
        
        # add y_mis
        k = 0
        for j, n_j in enumerate(d['n']):
            for i in range(n_j):
                if pl.isnan(m['y'][j][i]):
                    results['y_mis_%d'%k] = [(d['y'][j][i] > m['y_pred'][j].trace()[:,i]).sum() / float(stats['n'])]
                    k += 1

        q = q.append(pandas.DataFrame(results, index=['q_rep_%d'%n]))


    results = validation_transform(q)

    # display results
    graphics.scalar_validation_statistics(
        results, 
        [[r'$y_{mis}$', results.filter(like='y_mis').columns],
         [r'$\eta\times\eta$', results.filter(like='eta_cross_eta').columns],
         [r'$\eta$', results.filter(regex='eta_\d').columns],
         [r'$\delta_\mu$', results.filter(like='delta_mu').columns],
         [r'$\delta_\beta$', results.filter(like='delta_beta').columns],
         [r'$\sigma$', results.filter(like='sigma').columns],
         [r'$\beta$', results.filter(regex='^beta').columns],
         [r'$\gamma$', results.filter(regex='gamma').columns],
         [r'$\mu$', results.filter(regex='^mu').columns],
         ])

    return results