Beispiel #1
0
    def stats(self,
              alpha=0.05,
              start=0,
              batches=100,
              chain=None,
              quantiles=(2.5, 25, 50, 75, 97.5)):
        """
        Generate posterior statistics for node.

        :Parameters:
        name : string
          The name of the tallyable object.

        alpha : float
          The alpha level for generating posterior intervals. Defaults to
          0.05.

        start : int
          The starting index from which to summarize (each) chain. Defaults
          to zero.

        batches : int
          Batch size for calculating standard deviation for non-independent
          samples. Defaults to 100.

        chain : int
          The index for which chain to summarize. Defaults to None (all
          chains).

        quantiles : tuple or list
          The desired quantiles to be calculated. Defaults to (2.5, 25, 50, 75, 97.5).
        """

        try:
            trace = np.squeeze(
                np.array(self.db.trace(self.name)(chain=chain), float))[start:]

            n = len(trace)
            if not n:
                print_('Cannot generate statistics for zero-length trace in',
                       self.__name__)
                return

            return {
                'n':
                n,
                'standard deviation':
                trace.std(0),
                'mean':
                trace.mean(0),
                '%s%s HPD interval' % (int(100 * (1 - alpha)), '%'):
                utils.hpd(trace, alpha),
                'mc error':
                batchsd(trace, batches),
                'quantiles':
                utils.quantiles(trace, qlist=quantiles)
            }
        except:
            print_('Could not generate output statistics for', self.name)
            return
Beispiel #2
0
def analyze(parameters, datasets):
    image_path = os.path.join('Data', parameters['sumatra_label'])
    # Save traces
    trace_file = str(os.path.join('Data', parameters['sumatra_label'], 'traces.h5'))
    data_dict = OrderedDict()
    os.makedirs(os.path.join(image_path, 'acf'))
    with tables.open_file(trace_file, mode='r') as data:
        parnames = [x for x in data.root.chain0.PyMCsamples.colnames
                    if not x.startswith('Metropolis') and x != 'deviance']
        for param in sorted(parnames):
            data_dict[param] = np.asarray(data.root.chain0.PyMCsamples.read(field=param), dtype='float')
    for param, trace in data_dict.items():
        figure = plt.figure()
        figure.gca().plot(autocorr(trace))
        figure.gca().set_title(param+' Autocorrelation')
        figure.savefig(str(os.path.join(image_path, 'acf', param+'.png')))
        plt.close(figure)
        output_files.append(str(os.path.join(parameters['sumatra_label'], 'acf', param+'.png')))

    data = np.vstack(list(data_dict.values())).T
    data_truths = [parameters.as_dict()['parameters'][key].get('compare', None) for key in data_dict.keys()]
    figure = corner(data, labels=list(data_dict.keys()),
                    quantiles=[0.16, 0.5, 0.84],
                    truths=data_truths,
                    show_titles=True, title_args={"fontsize": 40}, rasterized=True)
    figure.savefig(str(os.path.join(image_path, 'cornerplot.png')))
    output_files.append(str(os.path.join(parameters['sumatra_label'], 'cornerplot.png')))
    plt.close(figure)
    # Write CSV file with parameter summary (should be close to pymc's format)
    with open(str(os.path.join(image_path, 'parameters.csv')), 'w') as csvfile:
        fieldnames = ['Parameter', 'Mean', 'SD', 'Lower 95% HPD', 'Upper 95% HPD',
                      'MC error', 'q2.5', 'q25', 'q50', 'q75', 'q97.5']
        writer = csv.DictWriter(csvfile, fieldnames)
        writer.writeheader()
        for parname, trace in data_dict.items():
            qxx = utils.quantiles(trace, qlist=(2.5, 25, 50, 75, 97.5))
            q2d5, q25, q50, q75, q975 = qxx[2.5], qxx[25], qxx[50], qxx[75], qxx[97.5]
            lower_hpd, upper_hpd = utils.hpd(trace, 0.05)
            row = {
                'Parameter': parname,
                'Mean': trace.mean(0),
                'SD': trace.std(0),
                'Lower 95% HPD': lower_hpd,
                'Upper 95% HPD': upper_hpd,
                'MC error': batchsd(trace, min(len(trace), 100)),
                'q2.5': q2d5, 'q25': q25, 'q50': q50, 'q75': q75, 'q97.5': q975
            }
            writer.writerow(row)
    output_files.append(str(os.path.join(parameters['sumatra_label'], 'parameters.csv')))
    # Generate comparison figures
    os.makedirs(os.path.join(image_path, 'results'))
    input_database = Database(parameters['input_database'])
    compare_databases = {key: Database(value) for key, value in parameters['compare_databases'].items()}
    idx = 1
    for fig in plot_results(input_database, datasets, data_dict, databases=compare_databases):
        fig.savefig(str(os.path.join(image_path, 'results', 'Figure{}.png'.format(idx))))
        output_files.append(str(os.path.join(parameters['sumatra_label'], 'results', 'Figure{}.png'.format(idx))))
        plt.close(fig)
        idx += 1
Beispiel #3
0
    def stats(self, alpha=0.05, start=0, batches=100,
              chain=None, quantiles=(2.5, 25, 50, 75, 97.5)):
        """
        Generate posterior statistics for node.

        :Parameters:
        name : string
          The name of the tallyable object.

        alpha : float
          The alpha level for generating posterior intervals. Defaults to
          0.05.

        start : int
          The starting index from which to summarize (each) chain. Defaults
          to zero.

        batches : int
          Batch size for calculating standard deviation for non-independent
          samples. Defaults to 100.

        chain : int
          The index for which chain to summarize. Defaults to None (all
          chains).

        quantiles : tuple or list
          The desired quantiles to be calculated. Defaults to (2.5, 25, 50, 75, 97.5).
        """

        try:
            trace = np.squeeze(
                np.array(
                    self.db.trace(
                        self.name)(
                            chain=chain),
                    float))[
                        start:]

            n = len(trace)
            if not n:
                print_(
                    'Cannot generate statistics for zero-length trace in',
                    self.__name__)
                return

            return {
                'n': n,
                'standard deviation': trace.std(0),
                'mean': trace.mean(0),
                '%s%s HPD interval' % (int(100 * (1 - alpha)), '%'): utils.hpd(trace, alpha),
                'mc error': batchsd(trace, min(n, batches)),
                'quantiles': utils.quantiles(trace, qlist=quantiles)
            }
        except:
            print_('Could not generate output statistics for', self.name)
            return
def precipProxy(scores, flag):
    """Compute...
    
    Notes: *scores* is assumed to be a pandas Series.
    """

    scores_late = scores.ix[1901:1981]

    if flag == 'pdsi':
      pdsi = pandas.read_csv(base_path + 'csv/jjPdsi.csv', index_col=[0])
      climVar = pdsi['p'].ix[1901:1981].values
    else:
      precip = pandas.read_csv(base_path + 'csv/mjPrecip.csv', index_col=[0])
      ref_mean = np.mean(precip['precip'].ix[1961:1990].values)
      climVar = precip['precip'].ix[1901:1981].values
      climVar_anom = climVar - ref_mean*np.ones(np.shape(climVar)[0])
      #center the climate variable
      climVar_cent = climVar - climVar.mean()
      
    years = range(1901,1981+1)

    # define priors
    beta  = Normal('beta', mu=zeros(2), tau=.001, value=zeros(2))
    sigma = Uniform('sigma', lower=0., upper=100., value=1.)

    # define predictions
    @deterministic
    def mu(beta=beta, chron=scores_late):
        return beta[0] + beta[1]*chron

    @deterministic
    def predicted(mu=mu, sigma=sigma):
        return rnormal(mu, sigma**-2.)

    # define likelihood
    @observed
    def y(value=climVar_cent, mu=mu, sigma=sigma):
        return normal_like(value, mu, sigma**-2.)

    # generate MCMC samples
    vars = [beta, sigma, mu, predicted, y]
    mc = MCMC(vars)
    mc.use_step_method(Metropolis, beta)
    mc.sample(iter=20000, thin=10, burn=10000, verbose=1)

    betas  = beta.trace.gettrace()
    sigmas = sigma.trace.gettrace()
    chron  = scores.values
    pred   = zeros((betas.shape[0], chron.shape[0]))


    for i in range(betas.shape[0]):
        pred[i, :] = predicted._eval_fun(mu=mu._eval_fun(beta=betas[i], chron=chron), sigma=sigmas[i])

    # plotting setup
    #t = range(1845, 1981+1)
    #t = range(1750, 1981+1)
    t = scores.index

    plot_vals = quantiles(pred, (5, 50, 95))

    recon = pandas.DataFrame(plot_vals[50], index = t, columns=['recon'])
    reconMonthly = pandas.DataFrame({ x: plot_vals[50] for x in ['Jan', 'Feb', 'Mar', 'April', 'May', 'June', 'July', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] }, index=t)
    
    if flag == 'precip':
      recon.to_csv('csv/reconPrecip.csv')
      reconMonthly.to_csv('csv/reconPrecipMonthly.csv', cols=['Jan', 'Feb', 'Mar', 'April', 'May', 'June', 'July', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
    else:
      recon.to_csv('csv/reconPdsi.csv')
      reconMonthly.to_csv('csv/reconPdsiMonthly.csv', cols=['Jan', 'Feb', 'Mar', 'April', 'May', 'June', 'July', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])

    #print recon.ix[1750:1850]

    # why do we need to rescale variance? i find this weird
    pred_std   = zeros((betas.shape[0], chron.shape[0]))
    for i in range(betas.shape[0]):
      pred_std[i, :] = pred[i, :]/plot_vals[50].std()*climVar_anom.std() + climVar.mean()
    
    plot_vals = pandas.DataFrame({'pred5': plot_vals[5],
                                  'pred50': plot_vals[50], 
                                  'pred95': plot_vals[95]}, index = t)#, columns=['pred5', 'pred50', 'pred95'])
    plot_vals.to_csv('csv/plot_vals.csv')


    plot_vals_std = quantiles(pred_std, (5, 50, 95))
    
    plot_vals_std = pandas.DataFrame({'pred5': plot_vals_std[5],
                                      'pred50': plot_vals_std[50], 
                                      'pred95': plot_vals_std[95]}, index = t)#, columns=['pred5', 'pred50', 'pred95'])
    plot_vals_std.to_csv('csv/plot_vals_std.csv')
Beispiel #5
0
def analyze(parameters, datasets):
    image_path = os.path.join('Data', parameters['sumatra_label'])
    # Save traces
    trace_file = str(
        os.path.join('Data', parameters['sumatra_label'], 'traces.h5'))
    data_dict = OrderedDict()
    os.makedirs(os.path.join(image_path, 'acf'))
    with tables.open_file(trace_file, mode='r') as data:
        parnames = [
            x for x in data.root.chain0.PyMCsamples.colnames
            if not x.startswith('Metropolis') and x != 'deviance'
        ]
        for param in sorted(parnames):
            data_dict[param] = np.asarray(
                data.root.chain0.PyMCsamples.read(field=param), dtype='float')
    for param, trace in data_dict.items():
        figure = plt.figure()
        figure.gca().plot(autocorr(trace))
        figure.gca().set_title(param + ' Autocorrelation')
        figure.savefig(str(os.path.join(image_path, 'acf', param + '.png')))
        plt.close(figure)
        output_files.append(
            str(
                os.path.join(parameters['sumatra_label'], 'acf',
                             param + '.png')))

    data = np.vstack(list(data_dict.values())).T
    data_truths = [
        parameters.as_dict()['parameters'][key].get('compare', None)
        for key in data_dict.keys()
    ]
    figure = corner(data,
                    labels=list(data_dict.keys()),
                    quantiles=[0.16, 0.5, 0.84],
                    truths=data_truths,
                    show_titles=True,
                    title_args={"fontsize": 40},
                    rasterized=True)
    figure.savefig(str(os.path.join(image_path, 'cornerplot.png')))
    output_files.append(
        str(os.path.join(parameters['sumatra_label'], 'cornerplot.png')))
    plt.close(figure)
    # Write CSV file with parameter summary (should be close to pymc's format)
    with open(str(os.path.join(image_path, 'parameters.csv')), 'w') as csvfile:
        fieldnames = [
            'Parameter', 'Mean', 'SD', 'Lower 95% HPD', 'Upper 95% HPD',
            'MC error', 'q2.5', 'q25', 'q50', 'q75', 'q97.5'
        ]
        writer = csv.DictWriter(csvfile, fieldnames)
        writer.writeheader()
        for parname, trace in data_dict.items():
            qxx = utils.quantiles(trace, qlist=(2.5, 25, 50, 75, 97.5))
            q2d5, q25, q50, q75, q975 = qxx[2.5], qxx[25], qxx[50], qxx[
                75], qxx[97.5]
            lower_hpd, upper_hpd = utils.hpd(trace, 0.05)
            row = {
                'Parameter': parname,
                'Mean': trace.mean(0),
                'SD': trace.std(0),
                'Lower 95% HPD': lower_hpd,
                'Upper 95% HPD': upper_hpd,
                'MC error': batchsd(trace, min(len(trace), 100)),
                'q2.5': q2d5,
                'q25': q25,
                'q50': q50,
                'q75': q75,
                'q97.5': q975
            }
            writer.writerow(row)
    output_files.append(
        str(os.path.join(parameters['sumatra_label'], 'parameters.csv')))
    # Generate comparison figures
    os.makedirs(os.path.join(image_path, 'results'))
    input_database = Database(parameters['input_database'])
    compare_databases = {
        key: Database(value)
        for key, value in parameters['compare_databases'].items()
    }
    idx = 1
    for fig in plot_results(input_database,
                            datasets,
                            data_dict,
                            databases=compare_databases):
        fig.savefig(
            str(os.path.join(image_path, 'results',
                             'Figure{}.png'.format(idx))))
        output_files.append(
            str(
                os.path.join(parameters['sumatra_label'], 'results',
                             'Figure{}.png'.format(idx))))
        plt.close(fig)
        idx += 1