def optimize_parameters(model, observed, gamma_hyper=0, pi_hyper=0, rho_hyper=0, start=None, verbose=1, **kwargs): n, g, a = observed.shape model.gamma_hyper.set_value(gamma_hyper) model.pi_hyper.set_value(pi_hyper) model.rho_hyper.set_value(rho_hyper) model.observed.set_value(observed.reshape((-1, a))) i = 1 while True: mapest, optim = pm.find_MAP(model=model, return_raw=True, maxeval=10000, start=start, progressbar=(verbose >= 2), **kwargs) # optim returns as None if it reaches maxeval or it gets a SIGKILL. if optim is not None: logp = model.logp(mapest) assert optim.success and np.isfinite(logp), \ f'Optimization failed:\n{optim}' break start = mapest i += 1 if verbose > 1: info(f'MAP estimate has not yet converged. ' f'Starting round {i} of gradient descent.') return mapest
def _load_input_data(allpaths): data = [] for path in allpaths: info(path) d = xr.open_dataarray(path).squeeze() info(f"Shape: {d.sizes}.") data.append(d) info("Concatenating data from {} files.".format(len(data))) data = xr.concat(data, "library_id", fill_value=0) return data
# -*- coding: utf-8 -*- import os, xbmc, ctypes from lib import util from base import TTSBackendBase DLL_PATH = os.path.join(xbmc.translatePath(util.info('path')).decode('utf-8'),'lib','backends','nvda','nvdaControllerClient32.dll') try: from ctypes import windll except ImportError: windll =None class NVDATTSBackend(TTSBackendBase): provider = 'nvda' @staticmethod def available(): if not windll: return False try: dll = ctypes.windll.LoadLibrary(DLL_PATH) res =dll.nvdaController_testIfRunning() == 0 del dll return res except: return False def __init__(self): try: self.dll = ctypes.windll.LoadLibrary(DLL_PATH) except:
def find_map( model, init={}, lag=10, stop_at=1.0, max_iter=int(1e5), learning_rate=1e-0, clip_norm=100.0, ): guide = pyro.infer.autoguide.AutoLaplaceApproximation( model, init_loc_fn=pyro.infer.autoguide.initialization.init_to_value(init), ) svi = pyro.infer.SVI( model, guide, pyro.optim.Adamax( optim_args={"lr": learning_rate}, clip_args={"clip_norm": clip_norm}, ), loss=pyro.infer.JitTrace_ELBO(), ) pyro.clear_param_store() pbar = tqdm(range(max_iter), position=0, leave=True) history = [] try: for i in pbar: elbo = svi.step() if np.isnan(elbo): break # Fit tracking history.append(elbo) # Reporting/Breaking if i < 2: pbar.set_postfix({"ELBO": history[-1]}) elif i < lag + 1: pbar.set_postfix( { "ELBO": history[-1], "delta_1": history[-2] - history[-1], } ) else: delta_lag = (history[-lag] - history[-1]) / lag pbar.set_postfix( { "ELBO": history[-1], "delta_1": history[-2] - history[-1], f"delta_{lag}": delta_lag, } ) if delta_lag < stop_at: info("Optimization converged") break except KeyboardInterrupt: info("Optimization interrupted") pbar.refresh() assert delta_lag < stop_at, ( f"Reached {args.max_iter} iterations with a per-step improvement of " f"{args.delta_lag}. Consider setting --max-iter " f"or --stop-at larger; increasing --learning-rate may also help, " f"although it could also lead to numerical issues." ) # Gather MAP from parameter-store mapest = { k: v.detach().cpu().numpy().squeeze() for k, v in pyro.infer.Predictive( model, guide=guide, num_samples=1 )().items() } return mapest, np.array(history)
warnings.filterwarnings( "ignore", message="CUDA initialization: Found no NVIDIA", category=UserWarning, lineno=130, ) warnings.filterwarnings( "ignore", message="torch.tensor results are registered as constants", category=torch.jit.TracerWarning, # module="trace_elbo", # FIXME: What is the correct regex for module? lineno=95, ) args = parse_args(sys.argv[1:]) info(args) info(f"Setting random seed: {args.random_seed}") np.random.seed(args.random_seed) info("Loading input data.") data = _load_input_data(args.pileup) info(f"Full data shape: {data.sizes}.") info("Filtering positions.") informative_positions = select_informative_positions( data, args.incid_thresh ) npos_available = len(informative_positions) info( f"Found {npos_available} informative positions with minor "
encoding=dict(tally=dict(zlib=True, complevel=compression_level)), )) if __name__ == "__main__": inpath = sys.argv[1] # 'data/ucfmt.sp-102506.gtpro-pileup.nc' minor_allele_thresh = float(sys.argv[2]) # 0.05 position_thresh = float(sys.argv[3]) # 0.25 npos_subsample = int(sys.argv[4]) dist_thresh = float(sys.argv[5]) # 0.2 clust_size_thresh = int(sys.argv[6]) # 3 clust_pos_frac_thresh = float(sys.argv[7]) # 0.5 pos_incid_thresh = float(sys.argv[8]) # 1.0 outpath = sys.argv[9] # 'data/ucfmt.sp-102506.gtpro-pileup.filt.nc' info(f"Loading data from `{inpath}`.") data = xr.open_dataset(inpath).tally.sum('read').squeeze() info(f"Identifying positions with " f">{minor_allele_thresh:.1%} minor allele incidence.") inf_positions = select_informative_positions(data, minor_allele_thresh) npos = len(inf_positions) info(f"Found {npos} informative positions.") info(f"Identifying libraries with observations at " f">{position_thresh:.1%} of informative positions.") frac_positions = ((data.sel(position=inf_positions) > 0).any("allele").mean("position")) high_cvrg_libs = xr_idxwhere(frac_positions > position_thresh) nlibs = len(high_cvrg_libs) info(f"Found {nlibs} high-coverage libraries.")
return pd.concat([r1, r2]).to_xarray().fillna(0).astype(int) if __name__ == "__main__": warnings.filterwarnings( "ignore", category=FutureWarning, module="numpy.lib.arraysetops", lineno=580, ) r1path = sys.argv[1] r2path = sys.argv[2] outpath = sys.argv[3] info("Loading r1") r1 = _load_read(r1path) info("Loading r2") r2 = _load_read(r2path) info("Concatenating reads") data = _concat_reads_to_xr(r1, r2) info("Summing r1 and r2") data = data.sum("read") info("Checking valid integer type") max_value = data.max() assert ( max_value < np.iinfo(DEFAULT_INT_TYPE).max ), f"Largest count is bigger than maximum of dtype ({DEFAULT_INT_TYPE})" info(f"Casting to {DEFAULT_INT_TYPE}") data = data.astype(DEFAULT_INT_TYPE)
#!/usr/bin/env python3 import pandas as pd import gzip from io import StringIO import sys from lib.util import info import numpy as np if __name__ == '__main__': snpdict_path = sys.argv[1] vcf_path = sys.argv[2] species_id = int(sys.argv[3]) outpath = sys.argv[4] info("Scanning VCF for data start") # Count header rows prefixed by '##'. with gzip.open(vcf_path, mode='rt') as f: # Assume gzipped input. for skiprows, line in enumerate(f): if line.startswith('##'): continue else: break info(f'Reading {vcf_path}') vcf = ( pd.read_table( vcf_path, skiprows=skiprows, dtype={ '#CHROM': str, 'POS': int,
constant_fields = {} for arg in constant_args: k, v = arg.split("=") constant_fields[k] = v return cls(table_name, path, has_header, constant_fields) if __name__ == "__main__": db_path, script_path, *input_args = sys.argv[1:] con = sqlite3.connect(db_path) with open(script_path) as f: con.executescript(f.read()) for arg in input_args: db_input = DatabaseInput.from_arg(arg) info(db_input) template = pd.read_sql(f"SELECT * FROM {db_input.table_name} LIMIT 0", con=con) columns = template.columns.to_list() d = pd.read_csv( db_input.path, sep="\t", skiprows={ True: 1, False: 0 }[db_input.has_header], names=columns, ) d = d.assign(**db_input.constant_fields) info(d.info())
#!/usr/bin/env python3 import xarray as xr import sys from lib.util import info if __name__ == "__main__": outpath = sys.argv[1] data = [] for path in sys.argv[2:]: data.append(xr.open_dataarray(path)) info(data[-1].sizes) data = xr.concat(data, "library_id", fill_value=0) info(data.sizes) info(f'Writing to {outpath}') data.to_dataset(name="tally").to_netcdf( outpath, encoding=dict(tally=dict(zlib=True, complevel=6)))