def main(hdf, name):
    # Create a csv file to tabulate into
    f = open('{0}.csv'.format(name), 'w')
    csvw = csv.writer(f)
    
    # Make a header for the table
    header = ["x", "count", "cond"]
    csvw.writerow(header)
    
    conds = set(["acc", "value", "rpe", "p", "rand", "box"])
    
    # Find a dmcol from models,
    # pick the first model with 
    # that dm cond. Conds across
    # models are the same.
    locations = {} 
    models = get_model_names(hdf)
    for model in models:

        # Get model meta data and compare it too conds
        meta = get_model_meta(hdf, model)
        for ii, dmname in enumerate(meta["dm"]):
            if dmname in conds:
                locations[dmname] = (model, ii)
                conds.remove(dmname)
                    ## conds loses elements!

        # If conds is empty, stop
        if not conds:
            print("All conds found. {0}".format(model))
            break
    
    # Get each dm's data and pick a col using pos;
    # pos matches cond.
    dmdata = {}
    for cond, loci in locations.items():
        print("Getting {0}".format(cond))
        
        model, pos = loci
        dm = np.array(read_hdf(hdf, '/' + model + '/dm'))
        dmdata[cond] = dm[:,:,pos].flatten()
    
    # Create a histogram then write it out.
    for cond, data in dmdata.items():
        print("Histogramming {0}".format(cond))
        
        # Instantiate a RHist instance and 
        # use it to make a histogram
        hist = RHist(name=cond, decimals=2)
        [hist.add(x) for x in data]
        
        # Tell that textfile a tale.
        [csvw.writerow([k, v, cond]) for k, v in hist.h.items()]
    
    f.close()
Exemple #2
0
def create_hist_list(hdf, model, stat):
    """ Create a list of Rhist (histogram) objects for <model> and 
    <stat> in the given <hdf>. 
    
    If <stat> has only one entry (as is the case for 'aic') the list will have 
    only one entry.  If however <stat> has n entries per model (like't') 
    the list will have n-1 entries. As n matches the number of columns in
    the design matrix, the rightmost will always correspond to the dummy 
    predictor and is therefore discarded. """

    hist_list = []  ## A list of RHist objects.
    meta = get_model_meta(hdf, model)  ## metadata for naming

    # A handle on the hdf data
    hdfdata = read_hdf(hdf, '/' + model + '/' + stat)

    # Loop over the nodes, adding the data
    # for each to a RHist.
    for node in hdfdata:
        # Some data will be list-like
        # so try to iterate, if that fails
        # assume the data is a single number
        try:
            for ii in range(len(node) - 1):
                # Init entries in hist_list as needed
                try:
                    hist_list[ii].add(node[ii])
                except IndexError:
                    hist_list.append(RHist(name=meta['dm'][ii], decimals=2))
                    hist_list[ii].add(node[ii])
        except TypeError:
            # Assume a number so hist_list has only one
            # entry (i.e. 0).
            #
            # Init entries in hist_list as needed
            try:
                hist_list[0].add(node)
            except IndexError:
                hist_list.append(RHist(name=stat, decimals=2))
                hist_list[0].add(node)

    return hist_list
Exemple #3
0
def hist_t(hdf, model, name=None):
    """ 
    Plot histograms of the t values in <hdf> for each condition in 
    <model>.
    
    If <name> is not None the plot is saved as <name>.pdf.
    """

    meta = get_model_meta(hdf, model)
    hist_list = []
    for dm_col in meta['dm']:
        # Make an instance RHist for the list.
        hist = RHist(name=dm_col, decimals=1)
        hist_list.append(hist)

    # read_hdf_inc returns a generator so....
    tdata = read_hdf_inc(hdf, '/' + model + '/t')
    for ts in tdata:
        # get the tvals for each instance of model
        # and add them to the hist_list,
        [hist_list[ii].add(ts[ii]) for ii in range(len(ts) - 1)]
        ## The last t in ts is the constant, which we
        ## do not want to plot.

    # Create a fig, loop over the hist_list
    # plotting each on fig.axes = 0.
    fig = plt.figure()
    fig.add_subplot(111)
    colors = itertools.cycle(
        ['DarkGray', 'DarkBlue', 'DarkGreen', 'MediumSeaGreen'])
    ## Using html colors...

    [h.plot(fig=fig, color=colors.next(), norm=True) for h in hist_list]

    # Prettify the plot
    ax = fig.axes[0]
    ax.set_xlabel('t-values')
    ax.set_ylabel('P(t)')

    # Add vetical lines representing significance tresholds
    ax.axvline(x=1.7822, label='p < 0.05', color='red', linewidth=4)
    ax.axvline(x=2.6810, label='p < 0.01', color='red', linewidth=3)
    ax.axvline(x=3.0545, label='p < 0.005', color='red', linewidth=2)
    ax.axvline(x=4.3178, label='p < 0.0005', color='red', linewidth=1)
    ## tval lines assume N=12 subjects

    plt.xlim(-10, 15)
    plt.legend()
    plt.title('{0} -- BOLD: {1}'.format(model, meta['bold']))

    if name != None:
        plt.savefig(name, format="pdf")
Exemple #4
0
def test_RHist():
	import numpy as np
	import scipy.stats as stats
	from bigstats.hist import RHist

	xs = stats.norm.rvs(size=10000)
	rh = RHist(name='test',decimals=1)
	[rh.incr(x) for x in xs]

	print('The normal distribution was sampled 10,000 times....')
	print('Est. Mean (0): {0}'.format(rh.mean()))
	print('Est. Var (1): {0}'.format(rh.var()))
	print('Est. Stdev (0.01): {0}'.format(rh.stdev()))
	print('N (10000): {0}'.format(rh.n()))
	print('** Decimals should be precise to 1 place.**')

	fig = rh.plot(fig=None,norm=True)

	# TODO: Add a uniform and some asymmetric dist too.