def get_trials(params, n_rep=100000):
    '''
    Generates n_rep number of facilitation curves for Go response for all simulated trials required
    
    Parameters
    -------------
    params : sequence (4,) of float
        k_facGo - scale of fac curve
        pre_t_mean - average start time before target presentation
        pre_t_sd - standard deviation of start time before target
        
        Returns
        --------
        fac_i : array
            facilitation curves for all simulated trials
        t : array
            sequence of time index
    '''
    k_facGo, pre_t_mean, pre_t_sd, tau_facGo, inhib, inhib_sd = params 
    t = np.linspace(-.4, .2, 600, endpoint=False)  
#    tau_facGo = 2  # Currently set, but will need to optomize
    pre_t = np.random.normal(pre_t_mean, pre_t_sd, size=n_rep) # generates n_rep random numbers from a normal distribution of mean, sd that given into function
    fac_i = np.zeros((n_rep, t.size))  # had to change from fac_i, t - why does this cause error now?!?! sets up empty array of zeros for all simulated trials
    for i in range(n_rep):  # for each simulated trial
        myparams = k_facGo, tau_facGo, pre_t[i]  # takes parameters passed into model plus pre_t number randomly generated for that simulated trial
        fac_i[i] = fast.get_fac(t, myparams)  # generates curve for that simulated trial
    return fac_i, t
def get_trials(params, n_rep=100000):
    '''
    Generates n_rep number of facilitation curves for Go response for all simulated trials required
    
    Parameters
    -------------
    params : sequence (4,) of float
        k_facGo - scale of fac curve
        pre_t_mean - average start time before target presentation
        pre_t_sd - standard deviation of start time before target
        
        Returns
        --------
        fac_i : array
            facilitation curves for all simulated trials
        t : array
            sequence of time index
    '''
    k_facGo, pre_t_mean, pre_t_sd, tau_facGo, inhib = params 
    t = np.linspace(-.4, .2, 600, endpoint=False, dtype=np.float32)  
#    tau_facGo = 2  # Currently set, but will need to optomize
    pre_t = np.array(np.random.normal(pre_t_mean, pre_t_sd, size=n_rep), dtype=np.float32)
    fac_i_parallel = np.zeros((n_rep, t.size), dtype=np.float32)
    
    if PAR_TEST:
        fac_i = np.zeros((n_rep, t.size), dtype=np.float32) 
        t_start = time()
        for i in range(n_rep):  # for each simulated trial
            myparams = k_facGo, tau_facGo, pre_t[i]
            #fac_i[i] = get_fac(t, myparams) 
            fac_i[i] = fast.get_fac(t, myparams) 
        t_end = time()  
        s_time = t_end - t_start
        print "Serial time: %.3f s" % s_time

    # Used for testing get_fac_parallel, it will fill the array fac_i_parallel
    #get_fac_parallel(fac_i_parallel, n_rep, t, len(t), k_facGo, tau_facGo, pre_t)

	# Setup CUDA variables
    tpb_x = 8 # threads per block in x dimension
    tpb_y = 8 # threads per block in y dimension
    block_dim = tpb_x, tpb_y
    bpg_x = int(n_rep / tpb_x) + 1 # block grid x dimension
    bpg_y = int(t.size / tpb_y) + 1 # block grid y dimension
    grid_dim = bpg_x, bpg_y
	
    t_start = time()
    stream = cuda.stream()
    with stream.auto_synchronize():
        d_fac = cuda.to_device(fac_i_parallel, stream)
        d_t = cuda.to_device(t, stream)
        d_pre_t = cuda.to_device(pre_t, stream)
        print "CUDA kernel: Block dim: ({tx}, {ty}), Grid dim: ({gx}, {gy})".format(tx=tpb_x, ty=tpb_y, gx=bpg_x, gy=bpg_y)
        get_fac_cuda[grid_dim, block_dim](d_fac, n_rep, t, len(t), k_facGo, tau_facGo, pre_t)
        d_fac.to_host(stream)
    t_end = time()  
    c_time = t_end - t_start
    print "CUDA time: %.3f s" % c_time

    if PAR_TEST:
        print "Difference betwwen fac_i and fac_i_parallel"
        print (fac_i - fac_i_parallel)
        print "Close enough? ", np.allclose(fac_i, fac_i_parallel, rtol=0, atol=1e-05)
        print "Speed up: %.3f x" % (s_time / c_time)

    return fac_i_parallel, t