Esempio n. 1
0
def test(challenge,lim=None):
    meta,desi,times_f,resid_f,error_f = load(challenge,limit=int(lim))

    alphaab = alphamat(meta)

    print 'Working with {0} pulsars.'.format(len(meta))

    with timing('GW covariance matrix [recurring]'):
        cgw = Cgw_100ns(alphaab,times_f,-2.0/3.0,fL=1.0/500,approx_ksum=True)

    cgw2 = cgw.copy()
    with timing('Cgw interpolation'):
        cgw3 = 0.2 * cgw + 0.8 * cgw2

    with timing('PN covariance matrix'):
        cpn = Cpn(error_f)

    with timing('Design matrix'):
        gmat = Gdesi2(desi,meta)        # note this takes meta, not len(meta)

    with timing('Reduced data'):
        resid_f = N.dot(gmat.T,resid_f)

    with timing('Reduced Cpn'):
        cpn = blockmul(cpn,gmat,meta,blas=True)

    with timing('Reduced Cgw [recurring]'):
        cgw = blockmul(cgw,gmat,meta,blas=True)

    cgw2 = cgw.copy()
    with timing('Reduced-Cgw interpolation'):
        cgw3 = 0.2 * cgw + 0.8 * cgw2

    with timing('Likelihood [recurring]'):
        logl = logL(resid_f,cgw,cpn)
Esempio n. 2
0
    def calculate_values(self):
        num_s = self.domain.num_states
        num_a = self.domain.num_actions
        r = self.domain.get_rewards()
        p = self.domain.get_probabilities()
        sum_probs = np.sum(p, axis=1, keepdims=True)

        if self.verbose:
            print('Value iteration begins...')

        # set starting state values
        if self.initial_values is not None:
            v_curr = self.initial_values.reshape(num_s, 1)
        else:
            v_curr = np.zeros((num_s, 1))

        self.iter_values.append((0, v_curr.reshape(num_s)))

        dist = np.inf
        i = 1
        t = time.perf_counter()
        while dist >= self.threshold or i < self.max_iterations:

            # noinspection PyCompatibility
            v = r + self.discount * p @ v_curr

            split_values = np.split(v, num_s)
            split_values = np.array(split_values)
            for s in range(num_s):
                for a in range(num_a):
                    if sum_probs[s * num_a + a] == 0:
                        split_values[s][a] = -np.inf
            # maximizing the Bellman eq. for all actions
            v_next = np.array(list(map(np.max, split_values)))
            v_next = v_next.reshape(num_s, 1)
            dist = np.linalg.norm(v_next - v_curr, ord=np.inf)
            self.iter_values.append((i,
                                     v_next.reshape(num_s),
                                     dist))
            i += 1
            v_curr = v_next

        if self.verbose:
            timing(t)
            print('Value iteration finished:')
            print(*self.iter_values, sep='\n')

        values = self.get_value_table()
        return values
Esempio n. 3
0
def checklike(challenge,procs,yL=500.0,lim=None,gproj=True,inject=False,A=5e-14,alpha=-2.0/3.0,debug=1,prange=None):
    """Load challenge data and compute the likelihood for a range of background amplitudes.
    Returns a two-column array of (A,logL)."""

    global resid_f,cgw,alphaab,times_f,gmat,meta,cpn,error_f

    meta,desi,times_f,resid_f,error_f = load(challenge,limit=lim)
    alphaab = alphamat(meta)

    with timing("Initial setup",1,debug):
        cgw  = Cgw_100ns(alphaab,times_f,alpha,fL=1.0/float(yL))
        cpn  = Cpn(error_f)

        if inject:
            resid_f = simulate(alphaab,times_f,cgw,cpn,A=A,n=1)

        if gproj:
            with timing("Timing for gmat setup",2,debug):
                if desi is None:
                    gmat = Gproj(times_f,len(meta))
                else:
                    print "Using tempo2 design matrix"
                    gmat = Gdesi2(desi,meta)        # gmat = Gdesi(desi,len(meta))

                resid_f = N.dot(gmat.T,resid_f)

                cgw = blockmul(cgw,gmat,meta)       # cgw = N.dot(gmat.T,N.dot(cgw,gmat))
                cpn = blockmul(cpn,gmat,meta)       # cpn = N.dot(gmat.T,N.dot(cpn,gmat))

    pool = multiprocessing.Pool(int(procs))

    if prange is None:
        x = N.linspace(1e-14,9e-14,20)              # range of A
    else:
        x = N.linspace(prange[0],prange[1],20)      # assigned range of A

    with timing("Total timing for {0} likelihoods".format(len(x)),1,debug):
        # l = pool.map(lnprob,[[x0] for x0 in x])
        l = pool.map(lnprob2,[[x0,alpha] for x0 in x])

    pool.close()
    pool.join()

    if debug is True or debug >= 2:
        print "Maximum %s found at par %s" % (N.max(l),x[N.argmax(l)])

    return N.array([x,l]).T
Esempio n. 4
0
def policy_iteration(mdp, threshold, discount):
    num_s = mdp.num_states
    num_a = mdp.num_actions
    r = mdp.get_rewards()
    p = mdp.get_probabilities()

    print('Policy iteration begins')

    v_curr = np.zeros((num_s, 1))
    policy_curr = np.zeros((num_s, 1))
    policy_next = np.full((num_s, 1), np.inf)

    t = time.perf_counter()
    while not np.array_equal(policy_curr, policy_next):
        distance = np.inf
        values = list()
        values.append((0, v_curr.reshape(num_s)))
        i = 1
        while distance >= threshold:
            v = r + discount * p @ v_curr

            split_values = np.split(v, num_s)
            # maximizing the Bellman eq. for all actions
            v_next = np.array(list(map(np.max, split_values)))
            v_next = v_next.reshape(num_s, 1)
            distance = np.linalg.norm(v_next - v_curr, ord=np.inf)
            values.append((i,
                           v_next.reshape(num_s),
                           distance))
            i += 1
            v_curr = v_next

        print(*values, sep='\n')

        # policy improvement
        last_v = r + discount * p @ v_curr
        split_last_values = np.split(last_v, num_s)
        # find actions that maximize the Bellman eq. (argmax)
        policy_curr = policy_next
        policy_next = np.array(list(map(np.argmax, split_last_values)))
        policy_next = policy_next.reshape(num_s, 1)
        print('policy iteration - policy: \n', policy_next)
    timing(t)

    return policy_next
Esempio n. 5
0
def emceehammer(challenge,procs=10,suffix=None,ndim=None,nwalkers=200,iters=100,limit=None,inject=False,resume=False,checkpoint=None):
    """Load challenge data and perform a single-parameter (A) emcee Hammer run on them.
    Save chain and probabilities to numpy arrays."""

    global resid_f,cgw,alphaab,times_f,gmat,meta,cpn,error_f

    meta,desi,times_f,resid_f,error_f = load(challenge,limit=limit)
    alphaab = alphamat(meta)

    if inject == 'raw':
        print "Loading clean data from raw challenge files"
        resid_f = loadraw(challenge,limit=None)

    if ndim is None and challenge in ['closed1','closed2','closed3']:
        ndim = 2*len(meta) + 2

    with timing("Initial setup"):
        cgw  = Cgw_100ns(alphaab,times_f,alpha=-2.0/3.0,fL=1.0/500)
        cpn  = Cpn(error_f)

        if challenge == 'open3':
            cpn = cpn +  Cred_100ns(alphaab,times_f,A=5.77e-22,alpha=1.7,fL=1.0/500)

        if inject == 'inject':
            print "Injecting synthetic signals at dataset times"
            resid_f = simulate(alphaab,times_f,cgw,cpn,A=5e-14,n=1)

        if desi is None:
            gmat = Gproj(times_f,len(meta))
        else:
            print "Using tempo2 design matrix"
            gmat = Gdesi2(desi,meta)        # gmat = Gdesi(desi,len(meta))

        resid_f = N.dot(gmat.T,resid_f)

        if ndim == 1:                       # otherwise the multiplication is done in logL
            cgw = blockmul(cgw,gmat,meta)   # cgw = N.dot(gmat.T,N.dot(cgw,gmat))
            cpn = blockmul(cpn,gmat,meta)   # cpn = N.dot(gmat.T,N.dot(cpn,gmat))

    if N.any(N.isnan(cgw.flatten())) or N.any(N.isinf(cgw.flatten())):
        raise ArithmeticError

    # multiprocessing seems to work better if nwalkers >> procs
    # also keep in mind that the ensemble is split in two...

    trueA, truealpha = 5e-14, -2.0/3.0
    trueAred, truealphared = 5.77e-22, 1.7

    if ndim == 1:
        # initial walker positions - a list of numpy arrays
        p0 = [random.uniform(trueA*0.5,trueA*1.5) for i in range(nwalkers)]

        sampler = emcee.EnsembleSampler(nwalkers,ndim,lnprob,args=[],threads=int(procs))
    elif ndim == 2:
        p0 = [[random.uniform(trueA*0.5,trueA*1.5),
               random.uniform(alpha_min,alpha_max)] for i in range(nwalkers)]

        sampler = emcee.EnsembleSampler(nwalkers,ndim,lnprob2,args=[],threads=int(procs))
    elif ndim == 4:
        p0 = [[random.uniform(trueA*0.5,trueA*1.5),
               random.uniform(alpha_min,alpha_max),
               random.uniform(trueAred*0.1,trueAred*10),
               random.uniform(alphared_min,alphared_max)] for i in range(nwalkers)]

        sampler = emcee.EnsembleSampler(nwalkers,ndim,lnprob4,args=[],threads=int(procs))
    elif ndim == 2*len(meta) + 2:
        p0 = [[random.uniform(trueA*0.5,trueA*1.5),random.uniform(alpha_min,alpha_max)] +
              [value for pulsar in meta
                     for value in [random.uniform(math.log10(trueAred*0.1),math.log10(trueAred*10)),random.uniform(alphared_min,alphared_max)]]
#                    for value in [random.uniform(trueAred*0.1,trueAred*10),random.uniform(alphared_min,alphared_max)]]
              for i in range(nwalkers)]

        sampler = emcee.EnsembleSampler(nwalkers,ndim,lnprob22Nlog,args=[],threads=int(procs))
        # sampler = emcee.EnsembleSampler(nwalkers,ndim,lnprob22N,args=[],threads=int(procs))
    elif ndim == 3*len(meta) + 2:
        p0 = [[random.uniform(trueA*0.5,trueA*1.5),random.uniform(alpha_min,alpha_max)] +
              [value for pulsar in meta
                     for value in [random.uniform(trueAred*0.1,trueAred*10),
                                   random.uniform(alphared_min,alphared_max),
                                   random.uniform(log10_efac_min,log10_efac_max)]]
              for i in range(nwalkers)]

        sampler = emcee.EnsembleSampler(nwalkers,ndim,lnprob23N,args=[],threads=int(procs))

    suffix = (suffix + '-' + str(ndim)) if suffix else str(ndim)

    resumefile = '../runs/resume-{0}-{1}.npy'.format(challenge,suffix)
    chainfile  = '../runs/chain-{0}-{1}.npy'.format(challenge,suffix)
    lnprobfile = '../runs/lnprob-{0}-{1}.npy'.format(challenge,suffix)

    if resume:
        p0 = N.load(resumefile)
        print "Resuming run from file", resumefile

    if checkpoint:
        for subrun in range(int(iters/checkpoint)):
            with timing("{0} x {1} samples (subrun {2})".format(checkpoint,nwalkers,subrun)):
                sampler.run_mcmc(p0,checkpoint)

            p0 = sampler.chain[:,-1,:]
            N.save(resumefile,p0)

            N.save(chainfile, sampler.chain)
            N.save(lnprobfile,sampler.lnprobability)
    else:
        with timing("{0} x {1} samples".format(iters,nwalkers)):
            sampler.run_mcmc(p0,iters)

        N.save(resumefile,sampler.chain[:,-1,:])

        N.save(chainfile, sampler.chain)
        N.save(lnprobfile,sampler.lnprobability)

    print "Done! Mean acceptance fraction:", N.mean(sampler.acceptance_fraction)
Esempio n. 6
0
def sample(pulsarfile='cJ0437-4715',pulsardir='.',suffix=None,outputdir='.',
           procs=1,fitpars=None,walkers=200,nsteps=100,ball=None,
           reseed=None,resume=False,useprefitvals=False,showml=False,improveml=False,efficiency='0.8',
           method='emcee',ntemps=1,writeparfile=False,dist=10.):
    global pulsar, multiplier, parameters, ranges, multipliers, priors, offsets, err, DMdist
    # evals, lapse

    DMdist = dist

    if method == 'multinest':
        from mpi4py import MPI
        import pymultinest

        printdebug = MPI.COMM_WORLD.Get_rank() == 0
    else:
        printdebug = True


    # find tempo2 files
    pulsarfile, parfile, timfile = sampleutils.findtempo2(pulsarfile,pulsardir=pulsardir,debug=printdebug)
#    parfile, timfile = '../eptadata/par/' + pulsarfile + '_EPTA_0.0.par', '../eptadata/tim/' + pulsarfile + '_EPTA_0.0.tim'
#    parfile, timfile = '../nanograv/par/' + pulsarfile + '_noPX.par', '../nanograv/tim/' + pulsarfile + '_NANOGrav_dfg+12.tim'
    whichpulsar = os.path.basename(pulsarfile)

    # initialize Cython proxy for tempo2 pulsar
    pulsar = T.tempopulsar(parfile,timfile)

    err = 1e-6 * pulsar.toaerrs

    # print "TOA errors: min {0:.2g} s, avg {1:.2g}, median {2:.2g}, max {3:.2g}".format(N.min(err),N.mean(err),N.median(err),N.max(err))

    # -- set up global lists/dicts of parameter names, offsets, ranges, priors

    # fitting parameters
    if fitpars:
        if fitpars[0] == '+':
            parameters = list(pulsar.pars) + fitpars[1:].split(',')
        else:
            parameters = fitpars.split(',')
    else:
        parameters = pulsar.pars

    if 'log10_Ared' in parameters or 'Ared' in parameters:
        setuprednoise()
    if 'log10_jitter' in parameters or 'jitter' in parameters:
        setupjitter()

    ndim = len(parameters)

    if printdebug:
        print "Fitting {0}/{1} parameters: {2}".format(ndim,pulsar.ndim,' '.join(parameters))

    meta = N.fromiter(((par,pulsar[par].val,pulsar[par].err,pulsar.prefit[par].val,pulsar.prefit[par].err)
                       if par in pulsar.allpars else (par,default[par],0.0,default[par],0.0)
                       for par in parameters),
                      dtype=[('name','a32'),('val','f16'),('err','f16'),('pval','f16'),('perr','f16')])

    # do it here, otherwise it will set the post-fit errors to zero
    for par in parameters:
        if par in pulsar.allpars:
            pulsar[par].fit = False

    if printdebug:
        print "Integrating over {0} parameters: {1}".format(pulsar.ndim,' '.join(pulsar.pars))

    if ball is None:
        ball = 1 if method == 'emcee' else 4

    for par in parameters:
        # start from best-fit and (1-sigma) least-squares error
        if par not in pulsar.allpars:
            center, error = N.longdouble(0), N.longdouble(0)
        elif useprefitvals:
            center, error = pulsar.prefit[par].val, pulsar.prefit[par].err
            if error == 0.0:
                error = pulsar[par].err
                if printdebug:
                    print "Warning: prefit error is zero for parameter {0}! Using post-fit error...".format(par)
        else:
            center, error = pulsar[par].val, pulsar[par].err

        if error == 0.0 and printdebug:
            print "Warning: error is zero for parameters {0}! (May be reset to prior.)".format(par)

        # offset parameters (currently F0 only) so that we handle them with sufficient precision
        offsets[par] = center if par in ['F0'] else 0.0

        # if an absolute range is not prescribed, derive it from the tempo2 best-fit and errors,
        # extending the latter by a prescribed or standard multiplier
        if par not in ranges:
            multiplier = multipliers[par] if par in multipliers else ball
            ranges[par] = ((center - offsets[par]) - multiplier*error, (center - offsets[par]) + multiplier*error)

        # make sure that ranges are compatible with prior ranges
        if par in priors and not hasattr(priors[par],'__call__'):
            offprior = priors[par][0] - offsets[par], priors[par][1] - offsets[par]

            if ranges[par][0] >= offprior[1] or ranges[par][1] <= offprior[0] or ranges[par][1] - ranges[par][0] == 0.0:
                # if the range is fully outside the prior, reset range to prior
                ranges[par] = offprior
            else:
                # otherwise, reset range to intersection of range and prior
                ranges[par] = max(ranges[par][0],offprior[0]), min(ranges[par][1],offprior[1])

        if printdebug:
            print "{0} range: [{1},{2}] + {3}".format(par,ranges[par][0],ranges[par][1],offsets[par])

    # -- main sampling setup and loop

    if method == 'emcee':
        # -- set up

        if reseed:
            # restart from the last step (do we double-count it then?)
            if ntemps > 1:
                data = N.load('{0}/chain-pt-{1}.npy'.format(outputdir,reseed))
                p0 = data[:,:,-1,:]
            else:
                data = N.load('{0}/chain-{1}.npy'.format(outputdir,reseed))
                p0 = [data[:,-1,:]]
        else:
            # initialize walkers in a Gaussian ball (rescaled by ranges)
            p0 = [[randomtuple() for i in range(walkers)] for j in range(ntemps)]

        p1 = [[randomtuple() for i in range(walkers)] for j in range(ntemps)]

        if ntemps > 1:
            sampler = emcee.PTSampler(ntemps,walkers,ndim,logL,logP,threads=int(procs))
        else:
            p0 = p0[0]  # only one temperature
            sampler = emcee.EnsembleSampler(walkers,ndim,logPL,threads=int(procs))

        # -- run!

        with timing("{0} x {1} (x {2} T) samples".format(nsteps,walkers,ntemps)):
            sampler.run_mcmc(p0,nsteps)

        print "Mean acceptance fraction:", N.mean(sampler.acceptance_fraction)

        # -- save everything

        filename = '{0}{1}-{2}.npy'.format(whichpulsar,'' if suffix is None else '-' + suffix,ndim)

        print
        print "Writing to files {0}/*-{1}".format(outputdir,filename)

        N.save('{0}/meta-{1}'.format(outputdir,filename),meta)

        if ntemps > 1:
            N.save('{0}/chain-pt-{1}'.format(outputdir,filename) ,sampler.chain)
            N.save('{0}/lnprob-pt-{1}'.format(outputdir,filename),sampler.lnprobability)

            N.save('{0}/chain-{1}'.format(outputdir,filename) ,sampler.chain[0,:,:,:])
            N.save('{0}/lnprob-{1}'.format(outputdir,filename),sampler.lnprobability[0,:,:])

            allpops, lnprobs = sampler.chain[0,:,-1,:], sampler.lnprobability[0,:,-1]

            lnZ, dlnZ = sampler.thermodynamic_integration_log_evidence(fburnin=0.1)
            print "Global (log) Evidence: %e +/- %e" % (lnZ, dlnZ)
        else:
            N.save('{0}/chain-{1}'.format(outputdir,filename) ,sampler.chain)
            N.save('{0}/lnprob-{1}'.format(outputdir,filename),sampler.lnprobability)

            allpops, lnprobs = sampler.chain[:,-1,:], sampler.lnprobability[:,-1]

        best = N.argmax(lnprobs)
        val_mode, logp_mode = allpops[best,:], lnprobs[best]
        # -- done
    elif method == 'multinest':
        outfile = '{0}/{1}{2}-'.format(outputdir,whichpulsar,'' if suffix is None else '-' + suffix)

        if efficiency[-1] == 'C' or efficiency[-1] == 'c':
            const_eff = True
            eff = float(efficiency[:-1])
        else:
            const_eff = False
            eff = float(efficiency)

        pymultinest.run(multilog,multiprior,ndim,
                        n_live_points=walkers,sampling_efficiency=eff,                            # 0.3/0.8 for evidence/parameter evaluation
                        #importance_nested_sampling = const_eff,const_efficiency_mode = const_eff, # possible with newer MultiNest
                        outputfiles_basename=outfile,resume=resume,verbose=True,init_MPI=False)   # if init_MPI=False, I should be able to use MPI in Python

        # if we're not root, we exit, and let him (her?) do the statistics
        if MPI.COMM_WORLD.Get_rank() != 0:
            sys.exit(0)

        print " Writing to files {0}*".format(outfile)
        print

        for line in open('{0}stats.dat'.format(outfile),'r'):
            if "Global Evidence" in line:
                print line.strip('\n')
        print

        # save tempo2 fit information
        N.save('{0}meta.npy'.format(outfile),meta)

        # now let's have a look at the populations
        cloud = N.loadtxt('{0}post_equal_weights.dat'.format(outfile))

        allpops = cloud[:,:-1]
        lnprobs = cloud[:,-1]

        live = N.loadtxt('{0}phys_live.points'.format(outfile))

        best = N.argmax(live[:,-2])
        val_mode, logp_mode = live[best,:-2], live[best,-2]
    else:
        raise NotImplementedError, ("Unknown sampling method: " + method)

    # further optimize the mode

    if improveml:
        optimizer = Simplex.Simplex(lambda xs: -logPL(xs),val_mode,0.1*N.var(allpops[:,:],axis=0))
        print "Optimizing MAP..."
        minimum, error, iters = optimizer.minimize(maxiters=1000,monitor=1); print
        val_mode = N.array(minimum)

    # statistical analysis

    # print header
    maxlen = max(3,max(map(len,parameters)))

    print '-' * (101 + maxlen + 3)
    print "%*s | tempo2 fit parameters              | mcmc-fit parameters                | diff     | erat     bias" % (maxlen,'par')

    # loop over fitted parameters
    for i,par in enumerate(parameters):
        if useprefitvals:
            val_tempo, err_tempo = meta[i]['pval'], meta[i]['perr']
        else:
            val_tempo, err_tempo = meta[i]['val'], meta[i]['err']

        val_mcmc = (val_mode[i] if showml else N.mean(allpops[:,i])) + offsets[par]   # MCMC values/errors
        err_mcmc = math.sqrt(N.var(allpops[:,i]))                                     # use cond. var. also for ML est.

        if writeparfile and par in pulsar.allpars:
            pulsar[par].val = val_mcmc
            pulsar[par].err = err_mcmc

        try:
            with numpy_seterr(divide='ignore'):
                print ('%*s | %+24.*e ± %.1e | %+24.*e ± %.1e | %+.1e | %.1e %+.1e'
                       % (maxlen,par,                                                   # parameter name
                          precisiondigits(val_tempo,err_tempo),val_tempo,err_tempo,     # tempo2 value and error
                          precisiondigits(val_mcmc, err_mcmc ),val_mcmc, err_mcmc,      # MCMC value and error
                          val_mcmc - val_tempo,                                         # MCMC/tempo2 difference
                          err_mcmc/err_tempo,                                           # ratio of errors
                          (val_mcmc - val_tempo)/err_tempo))                            # difference in units of tempo2 error
        except:
            print "Problem with values:", par, val_tempo, err_tempo, val_mcmc, err_mcmc

    print '-' * (101 + maxlen + 3)

    if writeparfile:
        parfilename = '{0}/{1}{2}-mcmc.par'.format(outputdir,whichpulsar,'' if suffix is None else '-' + suffix)
        pulsar.savepar(parfilename)
        print "Wrote new parfile to", parfilename

    val_tempo2 = [(par['pval'] if useprefitvals else par['val']) - offsets[par['name']] for par in meta]

    dof = pulsar.nobs - pulsar.ndim
    pmchisq = -2.0 * logL(val_mode) / dof
    try:
        pfchisq = -2.0 * logL(val_tempo2) / dof
    except:
        pfchisq = 'NaN'

    print
    print "{0}-fit log L: {1}; post-mcmc (best fit) log L: {2}".format('Pre' if useprefitvals else 'Post',pfchisq,pmchisq)

    pmrms = rmsres(val_mode)
    pfrms = rmsres(val_tempo2)

    print "{0}-fit rms res.: {1}; post-mcmc rms res.: {2}".format('Pre' if useprefitvals else 'Post',pfrms,pmrms)
Esempio n. 7
0
    samples_total = []
    for seq_ep in range(SEQ_STEPS if SEQ_RUN else EPISODES, EPISODES + 1,
                        SEQ_STEPS):
        game_rewards, ep_sample = play_dqn(env_stock,
                                           seq_ep,
                                           file_name_stock,
                                           load=IS_LOAD)
        samples_total.extend(ep_sample)

        # episode reward calculations
        game_reward_total = sum(game_rewards.values())
        game_reward_avg = game_reward_total / seq_ep
        print("\t Average reward per episode {:.2f}".format(game_reward_avg))

        rewards_total[seq_ep] = game_reward_total
    t_finish = timing(t_start)
    print('\n', '-' * 10, 'Summary', '-' * 10)

    with open('samples.csv', mode='w') as samples_handle:
        csv_writer = csv.writer(samples_handle, delimiter=',')

        csv_writer.writerows(samples_total)

    # total episodes reward calculations
    sum_rewards_total = sum(rewards_total.values())
    avg_rewards_total = sum_rewards_total / sum(rewards_total.keys())
    time_per_step = t_finish / sum_rewards_total
    print('*** Time per step: {:.4f} second(s)'.format(time_per_step))
    print('*** Sum of total rewards: {}'.format(sum_rewards_total))
    print('*** Average total reward: {:.2f}'.format(avg_rewards_total))
    '''-------Plotting-------'''