def write_data(self, filename='sim.data.R', n_model=100):


        data = dict(
            N = len(self._ep_obs),
            N_grbs = self._N_grbs,
            grb_length = self._grb_lengths,
            gamma_mu = self._gamma_mu,
            gamma_sigma = self._gamma_sigma,
            norm_mu = self._norm_mu,
            norm_sigma = self._norm_sigma,
            gamma = self._gammas,
            norm = self._norms,
            ep_obs = self._ep_obs,
            ep_err = self._ep_err,
            lum_obs = self._luminosity_obs,
            lum_err = self._luminosity_err,
            N_model = int(n_model),
            ep_model=np.linspace(.5,3.5,n_model)
        )

        pystan.stan_rdump(data,filename)
Esempio n. 2
0
# Let's consider an example in the context of Bayesian inference!

# We first simulate an observation and save it to a file
N = 1
simu_data = dict(N=N)

simu_model = stan_utility.compile_model('simulate_data.stan')
simu = simu_model.sampling(data=simu_data,
                           iter=1,
                           chains=1,
                           seed=4838282,
                           algorithm="Fixed_param")

data = dict(N=N, y=simu.extract()['y'].flatten())
pystan.stan_rdump(data, 'simulation.data.R')

# Now we can read that data back in and use Hamiltonian
# Monte Carlo to estimate posterior expectation values
input_data = pystan.read_rdump('simulation.data.R')

model = stan_utility.compile_model('fit_data.stan')
fit = model.sampling(data=input_data, seed=4938483)

# Check diagnostics
stan_utility.check_all_diagnostics(fit)

# That doesn't look good.  Let's investigate the divergent
# samples in the context of the non-divergent samples to
# see what's going on.
nondiv_params, div_params = stan_utility.partition_div(fit)
models_store['%s/formula_variables'%hdf_label] = pd.Series(formula_variables)


for count_variable in count_variables:
    baseline = count_variable+'_Mean_for_Year_and_Class_of_New_Immigrants_to_Class'

    predictors = dmatrix(formula, entries.ix[ind])
    stan_data = {'y': asarray(entries.ix[ind, count_variable].astype('int')),
                 'x': asarray(predictors),
                 'N': N,
                 'K': predictors.shape[1],
                 'baseline': asarray(entries.ix[ind,baseline])
             }
            
    data_file = 'counts_data_{0}_{1}.stan'.format(count_variable, n_observations)
    stan_rdump(stan_data, model_directory+data_file)
    submit_cmdstan_jobs(model,data_file)


# In[12]:

model = 'joint_counts_sampling_model'
hdf_label = 'joint_counts'
formula_variables = ['Relatedness',
                     'np.power(Relatedness, 2)',
                     'Popularity',
                     'np.power(Popularity, 2)',
                     'log(Agent_Previous_Citations_to_Class+1)',
                     'log(Agent_Productivity_Patents)',
                     'log(CoAgent_Previous_Patent_Count_in_Class+1)',
                     'Guided',
Esempio n. 4
0
#
# One-dimensional
#
############################################################

############################################################
# Create data
############################################################

model = stan_utility.compile_model('generate_data.stan')
fit = model.sampling(seed=194838, algorithm='Fixed_param', iter=1, chains=1)

data = dict(N = fit.extract()['N'].astype(numpy.int64),
            x_obs = fit.extract()['x_obs'][0,:])

pystan.stan_rdump(data, 'selection.data.R')

############################################################
# Fit model
############################################################

data = pystan.read_rdump('selection.data.R')

model = stan_utility.compile_model('selection.stan')
fit = model.sampling(data=data, chains=4, seed=4938483,
                     control=dict(adapt_delta=0.9, max_treedepth=12))

# Check diagnostics
stan_utility.check_all_diagnostics(fit)

# Default visual summaries
Esempio n. 5
0
    def sampling(self, data=None, chains=4, iter=2000, warmup=None, thin=1, \
                 save_warmup=False, sample_file=None, algorithm=None, wait_during_sampling=False, \
                 init=None, init_file=None, output_file=None, args=None):
        # generate .data.R file for data
        if ((data is not None) and (sample_file is not None)) or ((data is None) and (sample_file is None)) :
            raise Exception('Exactly one of data or sample_file must be specified.')
        if data is not None:
            if isinstance(data, dict):
                data_dict = data
            elif isinstance(data, pandas.DataFrame):
                data_dict = data.to_dict()
            else:
                raise Exception('data must be a dict or a pandas.DataFrame.')
            self.sample_file =  '.input.data.R'
            pystan.stan_rdump(data_dict, self.sample_file)
        elif sample_file is not None:
            self.sample_file = sample_file

        # generate .init.param.R if specified
        if (init is not None) and (init_file is not None):
            raise Exception('Initial parameters should be specified either by init (dict) or init_file (file name)')
        init_command = ''     
        if init_file is not None:
            self.init_file =  init_file
            init_command = ' init='+self.init_file+' '
        elif init is not None:
            if isinstance(init, dict):
                init_dict = init
            elif isinstance(init, pandas.DataFrame):
                init_dict = init.to_dict()
            else:  
                raise Exception('init must be a dict or a pandas.DataFrame.')
            self.init_file =  '.init.param.R'
            pystan.stan_rdump(init_dict, self.init_file)
            init_command = ' init='+self.init_file+' '

            
        # num_samples and num_warmup definitions
        if warmup is None:
            warmup = iter // 2

        num_samples = iter - warmup # PyStanとCmdStanで指定の仕方が違うので注意。CmdStanのnum_samplesはwarmup後のデータ数
        num_warmup  = warmup

        # algorithm definition
        if algorithm == 'Fixed_param':
            algorithmAndEigine = 'algorithm=fixed_param'
        elif (algorithm == 'NUTS') or (algorithm is None):
            algorithmAndEigine = 'algorithm=hmc engine=nuts'
        elif algorithm == 'HMC':
            algorithmAndEigine = 'algorithm=hmc engine=static'
        else:
            raise Exception('algorithm must be one of Fixed_param, NUTS (default), and HMC.')
        
        # output file
        if output_file is None:
            output_file = 'output'
            
        for i in range(chains):
            command = ''
            command += './' + self.model_name + ' id='+str(i+1)+ ' sample '
            command += 'num_samples=' + str(num_samples) + ' num_warmup=' + str(num_warmup)
            if save_warmup is True:
                command += ' save_warmup=1'
            command += ' ' + algorithmAndEigine
            if args is not None:
                command += ' ' + args
            command += ' data file=' + self.sample_file + ' output file=' + output_file + str(i+1) + '.csv'
            command += init_command
            # if wait_during_sampling is true, the final '&' will be omitted.
            if (wait_during_sampling == False) or (i < chains-1): 
                command += '&'
            command += '\n'
            os.system(command)
            print command

        outputFiles = []
        for i in range(1, chains+1):
            outputFiles.append(output_file + str(i) + '.csv')
        return StanFit4model(outputFiles)
Esempio n. 6
0
    def variational(self, data=None, sample_file=None, \
                    algorithm='meanfield', iter=10000, \
                    grad_samples=1, elbo_samples=100, eta=1.0, \
                    tol_rel_obj=0.01, output_samples=1000, \
                    init=None, init_file=None, output_file=None,\
                    args=None):
        """ interface of the  variational inference """
        if ((data is not None) and (sample_file is not None)) or ((data is None) and (sample_file is None)) :
            raise Exception('Exactly one of data or sample_file must be specified.')
        if data is not None:
            if isinstance(data, dict):
                data_dict = data
            elif isinstance(data, pandas.DataFrame):
                data_dict = data.to_dict()
            else:
                raise Exception('data must be a dict or a pandas.DataFrame.')
            sampleFileName =  '.input.data.R'
            pystan.stan_rdump(data_dict, sampleFileName)
        elif sample_file is not None:
            sampleFileName = sample_file

        # generate .init.param.R if specified
        if (init is not None) and (init_file is not None):
            raise Exception('Initial parameters should be specified either by init (dict) or init_file (file name)')
        init_command = ''     
        if init_file is not None:
            self.init_file =  init_file
            init_command = ' init='+self.init_file+' '
        elif init is not None:
            if isinstance(init, dict):
                init_dict = init
            elif isinstance(init, pandas.DataFrame):
                init_dict = init.to_dict()
            else:  
                raise Exception('init must be a dict or a pandas.DataFrame.')
            self.init_file =  '.init.param.R'
            pystan.stan_rdump(init_dict, self.init_file)
            init_command = ' init='+self.init_file+' '

        # output file
        if output_file is None:
            output_file = 'output'

        command = ''
        command += './' + self.model_name + ' variational'
        command += ' algorithm=' +algorithm.lower()
        command += ' iter=' + str(iter)
        if grad_samples is not None:
            command += ' grad_samples=' + str(grad_samples)
        if elbo_samples is not None:
            command += ' elbo_samples=' + str(elbo_samples)
        if eta is not None:
            command += ' eta=' + str(eta)
        command += ' tol_rel_obj=' + str(tol_rel_obj)
        if args is not None:
            command += ' ' + args
        command += ' output_samples=' + str(output_samples)
        command += ' data file=' + sampleFileName
        command += init_command
        command += ' output file=' + output_file + '.csv'

        os.system(command) # this generates a output.csv as default
    
        outputFiles = []
        outputFiles.append(output_file + '.csv')
        return StanFit4model(outputFiles)
Esempio n. 7
0
    def optimizing(self, data=None, sample_file=None, algorithm=None, iter=2000, \
                   init=None, init_file=None, output_file=None, args=None ):
        # generate .stan file
        if ((data is not None) and (sample_file is not None)) or ((data is None) and (sample_file is None)) :
            raise Exception('Exactly one of data or sample_file must be specified.')
        if data is not None:
            if isinstance(data, dict):
                data_dict = data
            elif isinstance(data, pandas.DataFrame):
                data_dict = data.to_dict()
            else:
                raise Exception('data must be a dict or a pandas.DataFrame.')
            sampleFileName =  '.input.data.R'
            pystan.stan_rdump(data_dict, sampleFileName)
        elif sample_file is not None:
            sampleFileName = sample_file

        # generate .init.param.R if specified
        # generate .init.param.R if specified
        if (init is not None) and (init_file is not None):
            raise Exception('Initial parameters should be specified either by init (dict) or init_file (file name)')
        init_command = ''     
        if init_file is not None:
            self.init_file =  init_file
            init_command = ' init='+self.init_file+' '
        elif init is not None:
            if isinstance(init, dict):
                init_dict = init
            elif isinstance(init, pandas.DataFrame):
                init_dict = init.to_dict()
            else:  
                raise Exception('init must be a dict or a pandas.DataFrame.')
            self.init_file =  '.init.param.R'
            pystan.stan_rdump(init_dict, self.init_file)
            init_command = ' init='+self.init_file+' '

        # output file
        if output_file is None:
            output_file = 'output'

        if (algorithm is not None) and (isinstance(algorithm, str) is False):
            raise Exception('algorithm must be a string.')
        elif algorithm is None:
            algorithm = 'LBFGS'

        command = ''
        command += './' + self.model_name + ' optimize '
        command += 'algorithm=' +algorithm.lower()
        if args is not None:
            command += ' ' + args
        command += ' iter=' + str(iter)
        command += ' data file=' + sampleFileName
        command += init_command
        command += ' output file=' + output_file + '.csv'
        
        os.system(command) # this generates a output.csv as default
        outputDataFrame = pandas.read_csv(output_file + '.csv', comment='#')
        retDict = outputDataFrame.to_dict()
        del retDict['lp__']

        return collections.OrderedDict(retDict) # PyStanではOrderedDictを返すので真似た。
for count_variable in count_variables:
    baseline = count_variable + '_Mean_for_Year_and_Class_of_New_Immigrants_to_Class'

    predictors = dmatrix(formula, entries.ix[ind])
    stan_data = {
        'y': asarray(entries.ix[ind, count_variable].astype('int')),
        'x': asarray(predictors),
        'N': N,
        'K': predictors.shape[1],
        'baseline': asarray(entries.ix[ind, baseline])
    }

    data_file = 'counts_data_{0}_{1}.stan'.format(count_variable,
                                                  n_observations)
    stan_rdump(stan_data, model_directory + data_file)
    submit_cmdstan_jobs(model, data_file)

# In[12]:

model = 'joint_counts_sampling_model'
hdf_label = 'joint_counts'
formula_variables = [
    'Relatedness', 'np.power(Relatedness, 2)', 'Popularity',
    'np.power(Popularity, 2)', 'log(Agent_Previous_Citations_to_Class+1)',
    'log(Agent_Productivity_Patents)',
    'log(CoAgent_Previous_Patent_Count_in_Class+1)', 'Guided', 'log(N_Agents)'
]
formula = " + ".join(formula_variables)
models_store['%s/formula_variables' % hdf_label] = pd.Series(formula_variables)
Esempio n. 9
0
        
data_dict['N_all'] = int(total_number_of_channels_used)
data_dict['N_grbs'] = len(np.unique(grb_number))
data_dict['max_n_echan'] = max_n_echans
data_dict['max_n_chan'] = max_n_chans
data_dict['N_dets'] = n_dets
data_dict['N_chan'] = n_chan.astype(int)
data_dict['N_echan'] = n_echan.astype(int)
data_dict['observed_counts'] = observed_counts
data_dict['background_counts'] = background_counts
data_dict['idx_background_nonzero'] = idx_background_nonzero.astype(int)
data_dict['idx_background_zero'] = idx_background_zero.astype(int)
data_dict['N_bkg_zero'] = n_bkg_zero.astype(int)
data_dict['N_bkg_nonzero'] = n_bkg_nonzero.astype(int)
data_dict['object_idx'] = np.array(grb_number).astype(int)
data_dict['background_errors'] = background_errors
data_dict['ebounds_lo'] = ebounds_lo
data_dict['ebounds_hi'] = ebounds_hi
data_dict['cbounds_lo'] = cbounds_lo
data_dict['cbounds_hi'] = cbounds_hi
data_dict['exposure'] = exposures
data_dict['response'] = responses
data_dict['mask'] = masks.astype(int)
data_dict['N_channels_used'] = n_channels_used.astype(int)
data_dict['grb_id'] = np.array(grb_number).astype(int)
data_dict['dl'] = dl
data_dict['z'] = z


pystan.stan_rdump(data_dict,'all_data.R')