def load_conf(self, args): """ Load the configuration file from standard paths or in arguments. """ self._collapse_arg(args, "cfpath") if args.cfpath: self.config = conf.load_conf(Path(args.cfpath).resolve()) else: self.config = conf.default_conf()
print("Should use Python >= 3.6") sys.exit() cmt.ARGS = args.parse_arguments(sys.argv[1:]) if cmt.ARGS["version"]: print(cmt.VERSION) sys.exit() err = args.get_invalid_modules_in_args() if len(err) > 0: print("ERR - Unknow module(s) : " + ','.join(err)) sys.exit() # conf.yml, conf.d/*.yml cmt.CONF = conf.load_conf() # if cron mode, introduce a small uase (offset) tô spread the load on metrology servers if cmt.ARGS['cron']: mypause = conf.get_startoffset() time.sleep(mypause) # Persist cmt.PERSIST = persist.Persist(file=cmt.DEFAULT_PERSIST_FILE) if cmt.ARGS["nopersist"]: cmt.PERSIST.dict = {} lastrun = cmt.PERSIST.get_key("cmt_last_run", 0) # remote conf (url) or cached conf conf.load_conf_remote(cmt.CONF)
from src import american_community_survey as amc from src import utils from src import download_spark ## START # Initiate the parser args = utils.get_argparser().parse_args() utils.printNowToFile("starting:") utils.printNowToFile("downloading spark") download_spark.download(os.getcwd()) ############################################################### if args.host and args.port: spark = conf.load_conf(args.host, args.port) else: spark = conf.load_conf_default() spark.sparkContext.addPyFile('ridge_regression.py') import ridge_regression as rr ## PREPROCESSING: CLEANING ## path to dataset DATA_PATH = './dataset' df = amc.load_dataset(DATA_PATH, spark) ############################################################### ## PREPROCESSING: FEATURES ENGINEERING # name of the target column and remove all the rows where 'PINCP' is null
#!/usr/bin/env python3 """Evaluate variant call predictions against GiaB truth """ import sys import tempfile import os import shutil import subprocess #import gzip #import glob #import io from conf import load_conf CONF = load_conf() assert os.path.exists(CONF['highconf_regions']) for f in CONF['truth_vcf'].values(): assert os.path.exists(f) def num_vars_from_vcf(f): #cmd = "zgrep -vc '^#' {}".format(f) fails on empty files # ignore h**o to ref which can come from samples extracted from joint calls cmd = "awk '$10 !~ /^0.0:/' {} | zgrep -v '^#' | wc -l".format(f) res = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True) return int(res.decode().strip()) def main(vartype, predvcf, truthvcf): """main function"""