def preOpt(param, gpus, log):
    J = param.couplings
    outdir = param.outdir
    alpha = param.alpha
    bimarg_target = param.bimarg

    log("Pre-Optimization:")

    # we assume that at this point the "main" sequence buffers are filled with
    # sequences
    gpus.setBuf('J', J)
    gpus.calcBicounts('main')
    gpus.calcEnergies('main')
    bicount, es, seqs = gpus.collect(['bicount', 'E main', 'seq main'])
    bimarg = bicount.astype(np.float32) / np.float32(np.sum(bicount[0, :]))

    mkdir_p(os.path.join(param.outdir, 'preopt'))
    writeStatus('preopt', 0, bimarg_target, bicount, bimarg, J, seqs, es,
                alpha, None, None, outdir, log)

    Jsteps, newJ = NewtonSteps('preopt', param, bimarg, gpus, log)

    return newJ, Jsteps
def newtonMCMC(param, gpus, start_run, log):
    J = param.couplings

    # copy target bimarg to gpus
    gpus.setBuf('bi target', param.bimarg)

    if param.tempering is not None:
        if param.nwalkers % len(param.tempering) != 0:
            raise Exception("# of temperatures must evenly divide # walkers")
        B0 = np.max(param.tempering)
        Bs = concatenate([
            full(param.nwalkers / len(param.tempering), b, dtype='f4')
            for b in param.tempering
        ])
        Bs = split(Bs, len(gpus))
        for B, gpu in zip(Bs, gpus):
            gpu.setBuf('Bs', B)
            gpu.markSeqs(B == B0)

    # setup up regularization if needed
    if param.reg == 'X':
        gpus.setBuf('Creg', param.regarg)
    if param.reg == 'Xself':
        gpus.setBuf('Xlambdas', param.regarg)

    # pre-optimization
    Jsteps = 0
    if param.preopt:
        J, Jsteps = preOpt(param, gpus, log)
    else:
        log("No Pre-optimization")

    # do some setup for reseed options
    seqs = param.seqs
    seedseq = param.seedseq
    if seqs is not None and param.reseed == 'single_best':
        gpus.calcEnergies()
        es = gpus.collect('E main')

    param.max_newtonSteps = param.newtonSteps
    param.min_ssr = np.inf

    # solve using newton-MCMC
    Jstep = Jsteps
    name_fmt = 'run_{{:0{}d}}'.format(int(np.ceil(np.log10(param.mcmcsteps))))
    for i in range(start_run, param.mcmcsteps):
        runname = name_fmt.format(i)

        # determine seed sequence, if using seed
        seed = None
        if seedseq is not None:
            seed = seedseq
            seedseq = None  # only use provided seed in first round
        elif param.reseed == 'single_indep':
            seed = generateSequences('independent', param.L, param.q, 1,
                                     param.bimarg, log)[0]
        elif param.reseed == 'single_random':
            #choose random seed from the final sequences from last round
            nseq = np.sum(s.shape[0] for s in seqs)
            seed = seqs[randint(0, nseq)]
        elif param.reseed == 'single_best':
            seed = seqs[np.argmin(es)]

        # fill sequence buffers (with seed or otherwise)
        mkdir_p(os.path.join(param.outdir, runname))
        if seed is not None:
            with open(os.path.join(param.outdir, runname, 'seedseq'),
                      'wt') as f:
                f.write("".join(param.alpha[c] for c in seed))
            gpus.fillSeqs(seed)
        elif param.reseed == 'independent':
            indep_seqs = generateSequences('independent', param.L, param.q,
                                           gpus.nwalkers, param.bimarg, log)
            gpus.setSeqs('main', indep_seqs)
        elif param.reseed == 'msa':
            gpus.setSeqs('main', param.seedmsa)

        Jstep, seqs, es, J = MCMCstep(runname, Jstep, J, param, gpus, log)
def MCMCstep(runName, Jstep, couplings, param, gpus, log):
    outdir = param.outdir
    alpha, L, q = param.alpha, param.L, param.q
    bimarg_target = param.bimarg

    log("")
    log("Gradient Descent step {}".format(runName))
    log("---------------------------")
    log("Total J update step {}".format(Jstep))

    #re-distribute energy among couplings
    #(not really needed, but makes nicer output and might prevent
    # numerical inaccuracy, but also shifts all seq energies)
    log("(Re-zeroing gauge of couplings)")
    couplings = fieldlessGaugeEven(np.zeros((L, q)), couplings)[1]

    mkdir_p(os.path.join(outdir, runName))
    np.save(os.path.join(outdir, runName, 'J'), couplings)

    MCMC_func = runMCMC
    if param.tempering is not None:
        MCMC_func = runMCMC_tempered

    start_time = time.time()

    log("Equilibrating MCMC chains...")
    (bimarg_model, bicount, sampledenergies, e_rho, ptinfo,
     equilsteps) = MCMC_func(gpus, couplings, runName, param, log)

    end_time = time.time()

    #get summary statistics and output them
    seqs = gpus.collect('seq main')
    writeStatus(runName, Jstep, bimarg_target, bicount, bimarg_model,
                couplings, seqs, sampledenergies, alpha, e_rho, ptinfo, outdir,
                log)

    dt = end_time - start_time
    log("Total MCMC running time: {:.1f} s    ({:.3g} MC/s)".format(
        dt,
        equilsteps * param.nsteps * np.float64(gpus.nwalkers) / dt))

    if param.tempering is not None:
        e, b = gpus.collect(['E main', 'Bs'])
        np.save(os.path.join(outdir, runName, 'walker_Es'), e)
        np.save(os.path.join(outdir, runName, 'walker_Bs'), b)

    # tune the number of Newton steps based on whether SSR increased
    ns_delta = param.newton_delta
    ssr = np.sum((bimarg_target - bimarg_model)**2)
    if param.last_ssr is not None:
        # we take average of last ssr and min ssr to allow some
        # amount of increase in each step due to statistical fluctuations,
        # rather than always requiring a decrease.
        if ssr > (param.last_ssr + param.min_ssr) / 2:
            # 2.0 would make back-steps equal to forward steps. Make it
            # 1.5 instead to slightly bias towards more newtonsteps on average
            param.newtonSteps = max(ns_delta,
                                    param.newtonSteps - int(1.5 * ns_delta))
            log("SSR increased over min. Decreasing newtonsteps to {}".format(
                param.newtonSteps))
    param.last_ssr = ssr
    param.min_ssr = min(ssr, param.min_ssr)

    Jsteps, newJ = NewtonSteps(runName, param, bimarg_model, gpus, log)
    param.newtonSteps = min(2048, Jsteps + ns_delta)
    log("Increasing newtonsteps to {}".format(param.newtonSteps))
    with open(os.path.join(outdir, runName, 'nsteps'), 'wt') as f:
        f.write(str(Jsteps))

    return Jstep + Jsteps, seqs, sampledenergies, newJ
def runMCMC_tempered(gpus, couplings, runName, param, log):
    nloop = param.equiltime
    trackequil = param.trackequil
    outdir = param.outdir
    # assumes small sequence buffer is already filled

    B0 = np.max(param.tempering)

    #get ready for MCMC
    for gpu in gpus:
        gpu.setBuf('J', couplings)

    #equilibration MCMC
    if nloop == 'auto':
        if trackequil != 0:
            equil_dir = os.path.join(outdir, runName, 'equilibration')
            mkdir_p(equil_dir)
        else:
            equil_dir = None

        loops = 8
        for i in range(loops):
            for gpu in gpus:
                gpu.runMCMC()
        step = loops

        equil_e = []
        while True:
            for i in range(loops):
                for gpu in gpus:
                    gpu.runMCMC()
                Bs, r = swapTemps(gpus, param.tempering, param.nswaps)

            step += loops
            energies, _ = track_main_bufs(param, gpus, equil_dir, step)
            equil_e.append(energies)
            np.save(
                os.path.join(outdir, runName, 'equilibration',
                             'Bs_{}'.format(step)), np.concatenate(Bs))

            if len(equil_e) >= 3:
                r1, p1 = spearmanr(equil_e[-1], equil_e[-2])
                r2, p2 = spearmanr(equil_e[-1], equil_e[-3])

                fmt = "({:.3f}, {:.2g})".format
                rstr = "Step {}, r={} prev:{}".format(step, fmt(r1, p1),
                                                      fmt(r2, p2))

                # Note that we are testing the correlation for *all* walkers,
                # no matter their temperature. In other words, we are waiting
                # for both the temperatures and energies to equilibrate - each
                # walker is expected to visit most temperatures during the
                # equilibration.
                if p1 > 0.02 and p2 > 0.02:
                    log(rstr + ". Equilibrated.")
                    break
            else:
                rstr = "Step {}".format(step)

            loops = loops * 2
            log(rstr + ". Continuing.")

        e_rho = [spearmanr(ei, equil_e[-1]) for ei in equil_e]
    elif trackequil == 0:
        #keep nloop iterator on outside to avoid filling queue with only 1 gpu
        for i in range(nloop):
            for gpu in gpus:
                gpu.runMCMC()
            Bs, r = swapTemps(gpus, param.tempering, param.nswaps)
    else:
        #note: sync necessary with trackequil (may slightly affect performance)
        mkdir_p(os.path.join(outdir, runName, 'equilibration'))
        equil_e = []
        for j in range(nloop // trackequil):
            for i in range(trackequil):
                for gpu in gpus:
                    gpu.runMCMC()
                Bs, r = swapTemps(gpus, param.tempering, param.nswaps)

            energies, _ = track_main_bufs(param, gpus, equil_dir, step)
            np.save(
                os.path.join(outdir, runName, 'equilibration',
                             'Bs_{}'.format(j)), np.concatenate(Bs))

            equil_e.append(energies)

        # track how well different walkers are equilibrated. Should go to 0
        e_rho = [spearmanr(ei, equil_e[-1]) for ei in equil_e]

    for B, gpu in zip(Bs, gpus):
        gpu.markSeqs(B == B0)
        gpu.clearLargeSeqs()
        gpu.storeMarkedSeqs()  #save seqs from smallbuf to largebuf

    #process results
    for gpu in gpus:
        gpu.calcBicounts('large')
        gpu.calcEnergies('large')
    res = readGPUbufs(['bicount', 'E large'], gpus)
    bicount = sumarr(res[0])
    # assert sum(bicount, axis=1) are all equal here
    bimarg_model = bicount.astype(np.float32) / np.float32(
        np.sum(bicount[0, :]))
    sampledenergies = np.concatenate(res[1])

    for gpu in gpus:
        gpu.logProfile()

    return bimarg_model, bicount, sampledenergies, e_rho, (Bs, r)
def runMCMC(gpus, couplings, runName, param, log):
    nloop = param.equiltime
    trackequil = param.trackequil
    outdir = param.outdir
    # assumes small sequence buffer is already filled

    #get ready for MCMC
    gpus.setBuf('J', couplings)

    #equilibration MCMC
    if nloop == 'auto':
        if trackequil != 0:
            equil_dir = os.path.join(outdir, runName, 'equilibration')
            mkdir_p(equil_dir)
        else:
            equil_dir = None

        loops = 8
        for i in range(loops):
            gpus.runMCMC()
        step = loops

        equil_e = []
        while True:
            for i in range(loops):
                gpus.runMCMC()

            step += loops
            energies, _ = track_main_bufs(param, gpus, equil_dir, step)
            equil_e.append(energies)

            rstr = "Step {} <E>={:.2f}. ".format(step, np.mean(energies))

            if len(equil_e) >= 3:
                r1, p1 = spearmanr(equil_e[-1], equil_e[-2])
                r2, p2 = spearmanr(equil_e[-1], equil_e[-3])

                fmt = "({:.3f}, {:.2g}) ".format
                rstr += "r={} prev:{}".format(fmt(r1, p1), fmt(r2, p2))

                if p1 > 0.02 and p2 > 0.02 and step >= param.min_equil:
                    log(rstr + "Equilibrated.")
                    break

            log(rstr + "Continuing.")
            loops = loops * 2

        e_rho = [spearmanr(ei, equil_e[-1]) for ei in equil_e]

    elif trackequil == 0:
        #keep nloop iterator on outside to avoid filling queue with only 1 gpu
        for i in range(nloop):
            gpus.runMCMC()

        step = nloop
        e_rho = None

    else:
        #note: sync necessary with trackequil (may slightly affect performance)
        equil_dir = os.path.join(outdir, runName, 'equilibration')
        mkdir_p(equil_dir)
        equil_e = []
        for j in range(nloop // trackequil):
            for i in range(trackequil):
                gpus.runMCMC()
            energies, _ = track_main_bufs(param, gpus, equil_dir,
                                          j * trackequil)
            equil_e.append(energies)

        step = nloop
        # track how well different walkers are equilibrated. Should go to 0
        e_rho = [spearmanr(ei, equil_e[-1]) for ei in equil_e]

    #process results
    gpus.calcBicounts('main')
    gpus.calcEnergies('main')
    bicount, es = gpus.collect(['bicount', 'E main'])
    bimarg_model = bicount / np.sum(bicount[0, :])

    gpus.logProfile()

    return bimarg_model, bicount, es, e_rho, None, step