def optimize_parameters(model,
                        observed,
                        gamma_hyper=0,
                        pi_hyper=0,
                        rho_hyper=0,
                        start=None,
                        verbose=1,
                        **kwargs):
    n, g, a = observed.shape
    model.gamma_hyper.set_value(gamma_hyper)
    model.pi_hyper.set_value(pi_hyper)
    model.rho_hyper.set_value(rho_hyper)
    model.observed.set_value(observed.reshape((-1, a)))
    i = 1
    while True:
        mapest, optim = pm.find_MAP(model=model,
                                    return_raw=True,
                                    maxeval=10000,
                                    start=start,
                                    progressbar=(verbose >= 2),
                                    **kwargs)
        # optim returns as None if it reaches maxeval or it gets a SIGKILL.
        if optim is not None:
            logp = model.logp(mapest)
            assert optim.success and np.isfinite(logp), \
                f'Optimization failed:\n{optim}'
            break

        start = mapest
        i += 1
        if verbose > 1:
            info(f'MAP estimate has not yet converged. '
                 f'Starting round {i} of gradient descent.')

    return mapest
Example #2
0
def _load_input_data(allpaths):
    data = []
    for path in allpaths:
        info(path)
        d = xr.open_dataarray(path).squeeze()
        info(f"Shape: {d.sizes}.")
        data.append(d)
    info("Concatenating data from {} files.".format(len(data)))
    data = xr.concat(data, "library_id", fill_value=0)
    return data
Example #3
0
# -*- coding: utf-8 -*-
import os, xbmc, ctypes
from lib import util
from base import TTSBackendBase

DLL_PATH = os.path.join(xbmc.translatePath(util.info('path')).decode('utf-8'),'lib','backends','nvda','nvdaControllerClient32.dll')

try:
	from ctypes import windll
except ImportError:
	windll =None

class NVDATTSBackend(TTSBackendBase):
	provider = 'nvda'

	@staticmethod
	def available():
		if not windll:
			return False
		try:
			dll = ctypes.windll.LoadLibrary(DLL_PATH)
			res =dll.nvdaController_testIfRunning() == 0
			del dll
			return res
		except:
			return False

	def __init__(self):
		try:
			self.dll = ctypes.windll.LoadLibrary(DLL_PATH)
		except:
Example #4
0
def find_map(
    model,
    init={},
    lag=10,
    stop_at=1.0,
    max_iter=int(1e5),
    learning_rate=1e-0,
    clip_norm=100.0,
):
    guide = pyro.infer.autoguide.AutoLaplaceApproximation(
        model,
        init_loc_fn=pyro.infer.autoguide.initialization.init_to_value(init),
    )

    svi = pyro.infer.SVI(
        model,
        guide,
        pyro.optim.Adamax(
            optim_args={"lr": learning_rate},
            clip_args={"clip_norm": clip_norm},
        ),
        loss=pyro.infer.JitTrace_ELBO(),
    )

    pyro.clear_param_store()
    pbar = tqdm(range(max_iter), position=0, leave=True)
    history = []
    try:
        for i in pbar:
            elbo = svi.step()

            if np.isnan(elbo):
                break

            # Fit tracking
            history.append(elbo)

            # Reporting/Breaking
            if i < 2:
                pbar.set_postfix({"ELBO": history[-1]})
            elif i < lag + 1:
                pbar.set_postfix(
                    {
                        "ELBO": history[-1],
                        "delta_1": history[-2] - history[-1],
                    }
                )
            else:
                delta_lag = (history[-lag] - history[-1]) / lag
                pbar.set_postfix(
                    {
                        "ELBO": history[-1],
                        "delta_1": history[-2] - history[-1],
                        f"delta_{lag}": delta_lag,
                    }
                )
                if delta_lag < stop_at:
                    info("Optimization converged")
                    break
    except KeyboardInterrupt:
        info("Optimization interrupted")
    pbar.refresh()
    assert delta_lag < stop_at, (
        f"Reached {args.max_iter} iterations with a per-step improvement of "
        f"{args.delta_lag}. Consider setting --max-iter "
        f"or --stop-at larger; increasing --learning-rate may also help, "
        f"although it could also lead to numerical issues."
    )
    # Gather MAP from parameter-store
    mapest = {
        k: v.detach().cpu().numpy().squeeze()
        for k, v in pyro.infer.Predictive(
            model, guide=guide, num_samples=1
        )().items()
    }
    return mapest, np.array(history)
Example #5
0
    warnings.filterwarnings(
        "ignore",
        message="CUDA initialization: Found no NVIDIA",
        category=UserWarning,
        lineno=130,
    )
    warnings.filterwarnings(
        "ignore",
        message="torch.tensor results are registered as constants",
        category=torch.jit.TracerWarning,
        # module="trace_elbo",  # FIXME: What is the correct regex for module?
        lineno=95,
    )

    args = parse_args(sys.argv[1:])
    info(args)

    info(f"Setting random seed: {args.random_seed}")
    np.random.seed(args.random_seed)

    info("Loading input data.")
    data = _load_input_data(args.pileup)
    info(f"Full data shape: {data.sizes}.")

    info("Filtering positions.")
    informative_positions = select_informative_positions(
        data, args.incid_thresh
    )
    npos_available = len(informative_positions)
    info(
        f"Found {npos_available} informative positions with minor "
Example #6
0
        encoding=dict(tally=dict(zlib=True, complevel=compression_level)),
    ))


if __name__ == "__main__":
    inpath = sys.argv[1]  # 'data/ucfmt.sp-102506.gtpro-pileup.nc'
    minor_allele_thresh = float(sys.argv[2])  # 0.05
    position_thresh = float(sys.argv[3])  # 0.25
    npos_subsample = int(sys.argv[4])
    dist_thresh = float(sys.argv[5])  # 0.2
    clust_size_thresh = int(sys.argv[6])  # 3
    clust_pos_frac_thresh = float(sys.argv[7])  # 0.5
    pos_incid_thresh = float(sys.argv[8])  # 1.0
    outpath = sys.argv[9]  # 'data/ucfmt.sp-102506.gtpro-pileup.filt.nc'

    info(f"Loading data from `{inpath}`.")
    data = xr.open_dataset(inpath).tally.sum('read').squeeze()

    info(f"Identifying positions with "
         f">{minor_allele_thresh:.1%} minor allele incidence.")
    inf_positions = select_informative_positions(data, minor_allele_thresh)
    npos = len(inf_positions)
    info(f"Found {npos} informative positions.")

    info(f"Identifying libraries with observations at "
         f">{position_thresh:.1%} of informative positions.")
    frac_positions = ((data.sel(position=inf_positions) >
                       0).any("allele").mean("position"))
    high_cvrg_libs = xr_idxwhere(frac_positions > position_thresh)
    nlibs = len(high_cvrg_libs)
    info(f"Found {nlibs} high-coverage libraries.")
    return pd.concat([r1, r2]).to_xarray().fillna(0).astype(int)


if __name__ == "__main__":
    warnings.filterwarnings(
        "ignore",
        category=FutureWarning,
        module="numpy.lib.arraysetops",
        lineno=580,
    )

    r1path = sys.argv[1]
    r2path = sys.argv[2]
    outpath = sys.argv[3]

    info("Loading r1")
    r1 = _load_read(r1path)
    info("Loading r2")
    r2 = _load_read(r2path)

    info("Concatenating reads")
    data = _concat_reads_to_xr(r1, r2)
    info("Summing r1 and r2")
    data = data.sum("read")
    info("Checking valid integer type")
    max_value = data.max()
    assert (
        max_value < np.iinfo(DEFAULT_INT_TYPE).max
    ), f"Largest count is bigger than maximum of dtype ({DEFAULT_INT_TYPE})"
    info(f"Casting to {DEFAULT_INT_TYPE}")
    data = data.astype(DEFAULT_INT_TYPE)
Example #8
0
#!/usr/bin/env python3

import pandas as pd
import gzip
from io import StringIO
import sys
from lib.util import info
import numpy as np

if __name__ == '__main__':
    snpdict_path = sys.argv[1]
    vcf_path = sys.argv[2]
    species_id = int(sys.argv[3])
    outpath = sys.argv[4]

    info("Scanning VCF for data start")
    # Count header rows prefixed by '##'.
    with gzip.open(vcf_path, mode='rt') as f:  # Assume gzipped input.
        for skiprows, line in enumerate(f):
            if line.startswith('##'):
                continue
            else:
                break
    info(f'Reading {vcf_path}')
    vcf = (
        pd.read_table(
        vcf_path,
        skiprows=skiprows,
        dtype={
            '#CHROM': str,
            'POS': int,
Example #9
0
        constant_fields = {}
        for arg in constant_args:
            k, v = arg.split("=")
            constant_fields[k] = v
        return cls(table_name, path, has_header, constant_fields)


if __name__ == "__main__":
    db_path, script_path, *input_args = sys.argv[1:]
    con = sqlite3.connect(db_path)
    with open(script_path) as f:
        con.executescript(f.read())

    for arg in input_args:
        db_input = DatabaseInput.from_arg(arg)
        info(db_input)
        template = pd.read_sql(f"SELECT * FROM {db_input.table_name} LIMIT 0",
                               con=con)
        columns = template.columns.to_list()

        d = pd.read_csv(
            db_input.path,
            sep="\t",
            skiprows={
                True: 1,
                False: 0
            }[db_input.has_header],
            names=columns,
        )
        d = d.assign(**db_input.constant_fields)
        info(d.info())
#!/usr/bin/env python3

import xarray as xr
import sys
from lib.util import info

if __name__ == "__main__":
    outpath = sys.argv[1]

    data = []
    for path in sys.argv[2:]:
        data.append(xr.open_dataarray(path))
        info(data[-1].sizes)
    data = xr.concat(data, "library_id", fill_value=0)
    info(data.sizes)

    info(f'Writing to {outpath}')
    data.to_dataset(name="tally").to_netcdf(
        outpath, encoding=dict(tally=dict(zlib=True, complevel=6)))