def main(): options = parse_args() mode = options.mode njobs = options.ncores # setup jobs with open(options.config, 'r') as f: cfg = yaml.full_load(f) # group jobs files = cfg["files"] if options.nfiles > 0: files = files[:options.nfiles] if mode in ["multiprocessing"] or njobs < 0: njobs = len(files) grouped_files = [list(x) for x in np.array_split(files, njobs)] tasks = [ {"task": df_skim, "args": (fs,cfg,options.output.format(idx)), "kwargs": {}} for idx, fs in enumerate(grouped_files) ] if mode=="multiprocessing" and options.ncores==0: results = pysge.local_submit(tasks) elif mode=="multiprocessing": results = pysge.mp_submit(tasks, ncores=options.ncores) elif mode=="sge": results = pysge.sge_submit( "zdb", "_ccsp_temp/", tasks=tasks, options=options.sge_opts, sleep=5, request_resubmission_options=True, ) print("Finished!")
def connections(): if request.method == 'POST': # Connection page wants something act = request.form['action'] if act == 'add': # First page of adding Connection return render_template('pages/connections-add.html', action=act) if act == 'add2': # Second page of adding Connection mark = request.form['market'] if mark == 'crypto': ex = ccxt.exchanges return render_template('pages/connections-add.html', action=act, market=mark, exch=ex, len=len(ex)) if mark == 'forex': return render_template('pages/connections-add.html', action=act, market=mark) if act == 'fin': # Setup of exchange has finished create the connection ex = request.form['exchSel'] market = request.form['market'] if market == 'crypto': do.createCryptoCon(ex) return redirect("/connections") if act == 'info': # Create temp exchange instance based on post data ex = request.form['ex'] return do.createCryptoInfo(ex) if act == 'fullinfo': con = request.form['con'] # Create pathname and load connection config cfname = confPath + 'conn' + os.path.sep + con + '.yml' with open(cfname, 'r') as file: cfdata = yaml.full_load(file) # Create table in html cftable = "<table>" for key in cfdata: cftable = cftable + "<tr><th>" + str(key) + "</th><td>" + str( cfdata[key]) + "</td></tr>" cftable = cftable + "</table>" return cftable if act == 'delete': # Delete connection flash('Connection Deleted!', 'important') # Delete file delfile = confPath + 'conn' + os.path.sep + request.form[ 'con'] + '.yml' os.remove(delfile) return redirect("/connections") else: connections = do.allCfgs('conn') return render_template('pages/connections.html', connections=connections)
def run(self): self.logger.info("Reading input file: {}".format(self.input_file)) with open(self.input_file, "r") as inputfile: data = yaml.full_load(inputfile.read()) reads = {} for sample, units in data['samples'].items(): reads[sample] = {'R1': [], 'R2': []} for unit in units: for f in data['units'][unit]: reads[sample][self.get_read_pair(f)].append(f) new_samples = {} if self.config: self.logger.info("Skipping merge: --config_only mode activated") for s, pairs in reads.items(): cmd = ['cat'] for p in pairs['R1']: cmd.append(p) cmd.append('>' + os.path.join(self.folder,'{}_R1.fastq.gz'.format(s))) if not self.config: self.logger.info("Running merge command: {}".format(cmd)) subprocess.run(' '.join(cmd), shell=True) cmd = ['cat'] for p in pairs['R2']: cmd.append(p) cmd.append('>' + os.path.join(self.folder,'{}_R2.fastq.gz'.format(s))) if not self.config: if self.paired: self.logger.info("Paired Reads mode activated: merging R2 reads.") self.logger.info("Running R2 merge command: {}".format(cmd)) subprocess.run(' '.join(cmd), shell=True) workdir = os.getcwd() new_samples[s] = os.path.join(workdir,self.folder,'{}_R1.fastq.gz'.format(s)) yaml_template = 'config.template.yaml' with open(yaml_template, "r") as inputfile: new_data = yaml.full_load(inputfile.read()) new_data['samples'] = new_samples yaml_project = 'config.project.{}.yaml'.format(self.project) if not self.merge: self.logger.info("Writing configfile: {}".format(yaml_project)) with open(yaml_project, "w") as outfile: yaml.dump(new_data, outfile, indent=4) if self.merge: self.logger.info("Skipping configfile generation: --merge_only mode activated")
def createANN(self, nugget, nom, testsplit, scaler, scarcity, inputlayerunits, hiddenlayers, hiddenlayerunits, optimizer, loss, metrics, batchsize, epoch): # Create ANN YAML id = nom.lower() annYML = 'id: ' + id + "\n" annYML = annYML + 'name: ' + nom + "\n" annYML = annYML + 'nugget: ' + nugget + "\n" annYML = annYML + 'training: True' + "\n" annYML = annYML + 'scaler: ' + scaler + "\n" # print(scarcity,file=sys.stderr) if scarcity == "on": annYML = annYML + 'scarcity: True' + "\n" else: annYML = annYML + 'scarcity: False' + "\n" annYML = annYML + 'testsplit: ' + testsplit + "\n" # ANN Layers annYML = annYML + 'inputlayerunits: ' + inputlayerunits + "\n" annYML = annYML + 'hiddenlayers: ' + hiddenlayers + "\n" annYML = annYML + 'hiddenlayerunits: ' + hiddenlayerunits + "\n" # Fitting annYML = annYML + 'optimizer: ' + optimizer + "\n" annYML = annYML + 'loss: ' + loss + "\n" annYML = annYML + 'metrics: ' + metrics + "\n" annYML = annYML + 'batchsize: ' + batchsize + "\n" annYML = annYML + 'epoch: ' + epoch + "\n" # Training annYML = annYML + 'lasttrain: 0' + "\n" annYML = annYML + 'trainaccuracy: 0' + "\n" annYML = annYML + 'testaccuracy: 0' + "\n" # Add Nugget Info nfile = self.nuggetDataPath + nugget + '.pkl' # df = pd.read_feather(nfile) info = self.nugInfo(nfile) # Add info from nuggetinfo and enrichments annYML = annYML + 'symb: ' + info['symb'] + "\n" annYML = annYML + 'timeframe: ' + info['timeframe'] + "\n" annYML = annYML + 'from: ' + str(info['from']) + "\n" annYML = annYML + 'to: ' + str(info['to']) + "\n" annYML = annYML + 'depen: ' + info['depen'] + "\n" # indis = list(df.columns[0].values.tolist()) with open(self.enConfPath + info['indi'] + '.yml', 'r') as afile: indi = yaml.full_load(afile) # print(indi['riches'],file=sys.stderr) annYML = annYML + 'indi: ' + indi['riches'] + "\n" # Delete empty lines annYML = os.linesep.join([s for s in annYML.splitlines() if s]) # Save to YAML file self.writeRawCfgFile('aiann', id, annYML)
def read_menu_from_github(conf, save=False): """Read master.yaml from GitHub""" token = os.getenv("GITHUB_TOKEN") print(token) g = github.Github(token) repo = g.get_repo(conf.git_remote) try: master = repo.get_contents("master.yaml") except github.GithubException: master = repo.get_contents("master.yaml") master_dict = yaml.full_load(master.decoded_content) if save: save_yaml(master_dict, config.reference_dir) else: return master_dict
def multi_skim( configs, mode='multiprocessing', ncores=0, nfiles=-1, batch_opts="", outputs=None, chunksize=250000, ): all_tasks = [] for config, output in zip(configs, outputs): outdir = os.path.dirname(output) if not os.path.exists(outdir): os.makedirs(outdir) njobs = ncores #setup jobs with open(config, 'r') as f: cfg = yaml.full_load(f) # group jobs files = cfg["files"] if nfiles > 0: files = files[:nfiles] if mode in ["multiprocessing"] or njobs < 0: njobs = len(files) grouped_files = [list(x) for x in np.array_split(files, njobs)] tasks = [{ "task": job, "args": (fs, copy.deepcopy(cfg), output.format(idx)), "kwargs": { "chunksize": chunksize }, } for idx, fs in enumerate(grouped_files)] all_tasks.extend(tasks) submit_tasks(all_tasks, mode, ncores, batch_opts) print("Finished!")
def readCfgFile(self, oftype, nom): fname = self.confPath + oftype + os.path.sep + nom with open(fname, 'r') as file: output = yaml.full_load(file) return output
def read_yaml(file_path): """Simple function to read yaml file""" with open(file_path) as yml: dict_ = yaml.full_load(yml) return dict_
modes_module_path = os.path.join(exptool_package_path, "modes.py") modes_module_spec = importlib.util.spec_from_file_location( "acconeer.exptool.modes", modes_module_path) modes_module = importlib.util.module_from_spec(modes_module_spec) modes_module_spec.loader.exec_module(modes_module) parser = argparse.ArgumentParser() parser.add_argument("input_filename") args = parser.parse_args() in_fn = args.input_filename out_fn = os.path.join(exptool_package_path, "data", "regmap.yaml") assert os.path.exists(os.path.dirname(out_fn)) with open(in_fn, "r") as in_f: d = yaml.full_load(in_f) def clean(d): to_pop = [] for k, v in d.items(): if k == "description": to_pop.append(k) continue if type(v) == dict: if v.get("internal", False): to_pop.append(k) continue
2] #"https://raw.githubusercontent.com/BBerastegui/fresh-dns-servers/master/resolvers.txt" print('downloading data...') r = requests.get(URL) print('processing data...') response_text = r.text new_ips = [] for txt in response_text.splitlines(): new_ips.append(txt) print('read yaml file...') yaml_dict = {} with open(INPUT_FILE_NAME) as file: documents = yaml.full_load(file) documents['resolvers'] = new_ips print('write yaml file with processed data...') with open(INPUT_FILE_NAME, 'w') as file: d = yaml.dump(documents, file, indent=4) old_string = '- ' new_string = ' - ' # Safely read the input filename using 'with' with open(INPUT_FILE_NAME) as f: s = f.read() # Safely write the changed content, if found in the file with open(INPUT_FILE_NAME, 'w') as f:
log.info( "Reading IP and port of {} from env vars".format(cc_db_var_name_vimemu)) env_vars = os.environ.keys() cc_db_ip = os.getenv([ var for var in env_vars if (cc_db_var_name_k8s in var and cc_db_host_k8s in var) ][0]) if not cc_db_ip: cc_db_ip = os.getenv(cc_db_var_name_vimemu + "_ip", "localhost") cc_db_port = os.getenv([ var for var in env_vars if (cc_db_var_name_k8s in var and cc_db_port_k8s in var) ][0]) if not cc_db_port: cc_db_port = os.getenv(cc_db_var_name_vimemu + "_port", "9090") cc_db_url = "http://{}:{}".format(cc_db_ip, cc_db_port) # use that to replace the URL in the configuration datasource_path = "/etc/grafana/provisioning/datasources/datasource.yml" log.info("Updating configuration in {} accordingly".format(datasource_path)) f_r = open(datasource_path, "r") ds = yaml.full_load(f_r) ds['datasources'][0]['url'] = cc_db_url f_r.close() f_w = open(datasource_path, "w") log.debug("Replacing datasource URL") f_w.write(yaml.safe_dump(ds)) f_w.close() log.info("Done updating Grafana configurations")
def analyse( config, mode="multiprocesing", ncores=0, nfiles=-1, batch_opts="", output=None, chunksize=500000, merge_opts={}, ): if len(output.split(":")) != 2: raise ValueError( "The output kwarg should be None or a string with the format " "'{file_name}:{table_name}' instead of " + "{}".format(output)) njobs = ncores # setup jobs with open(config, 'r') as f: cfg = yaml.full_load(f) # group jobs files = cfg["files"] if nfiles > 0: files = files[:nfiles] if mode in ["multiprocessing"] or njobs < 0: njobs = len(files) grouped_files = [list(x) for x in np.array_split(files, njobs)] tasks = [{ "task": df_process, "args": (fs, cfg["query"]), "kwargs": { "chunksize": chunksize }, } for fs in grouped_files] results = submit_tasks(tasks, mode=mode, ncores=ncores, batch_opts=batch_opts) if mode == 'multiprocessing': df = functools.reduce(lambda x, y: df_merge(x, y), results) else: # grouped multi-merge merge_njobs = merge_opts.get("ncores", 100) grouped_merges = [ list(x) for x in np.array_split(results, merge_njobs) ] tasks = [{ "task": df_open_merge, "args": (r, ), "kwargs": {}, } for r in grouped_merges] merge_mode = merge_opts.get("mode", "multiprocessing") if merge_mode == "multiprocessing" and ncores == 0: semimerged_results = pysge.local_submit(tasks) df = functools.reduce(lambda x, y: df_merge(x, y), results) elif mode == "multiprocessing": semimerged_results = pysge.mp_submit(tasks, ncores=ncores) df = functools.reduce(lambda x, y: df_merge(x, y), results) elif mode == "sge": semimerged_results = pysge.sge_submit( tasks, "zdb-merge", "_ccsp_temp", options=merge_opts.get("batch_opts", "-q hep.q"), sleep=5, request_resubmission_options=True, return_files=True, ) df = df_open_merge(semimerged_results) if output is not None: path, table = output.split(":") df.to_hdf( path, table, format='table', append=False, complevel=9, complib='zlib', ) else: return df
def multi_analyse( configs, mode="multiprocesing", ncores=0, nfiles=-1, batch_opts="", outputs=None, chunksize=500000, merge_opts={}, ): for output in outputs: if len(output.split(":")) != 2: raise ValueError( "The output kwarg should be None or a string with the format " "'{file_name}:{table_name}' instead of " + "{}".format(output)) all_tasks, sizes = [], [] for config in configs: njobs = ncores # setup jobs with open(config, 'r') as f: cfg = yaml.full_load(f) # group jobs files = cfg["files"] if nfiles > 0: files = files[:nfiles] if mode in ["multiprocessing"] or njobs < 0: njobs = len(files) grouped_files = [list(x) for x in np.array_split(files, njobs)] tasks = [{ "task": df_process, "args": (fs, cfg["query"]), "kwargs": { "chunksize": chunksize }, } for fs in grouped_files] all_tasks.extend(tasks) if len(sizes) == 0: sizes.append(len(tasks)) else: sizes.append(len(tasks) + sizes[-1]) all_results = submit_tasks(all_tasks, mode=mode, ncores=ncores, batch_opts=batch_opts) merge_tasks, merge_sizes = [], [] for start, stop in zip([0] + sizes[:-1], sizes): results = all_results[start:stop] if mode == 'multiprocessing': df = functools.reduce(lambda x, y: df_merge(x, y), results) else: # grouped multi-merge merge_njobs = merge_opts.get("ncores", 100) grouped_merges = [ list(x) for x in np.array_split(results, merge_njobs) ] tasks = [{ "task": df_open_merge, "args": (r, ), "kwargs": {}, } for r in grouped_merges] merge_tasks.extend(tasks) if len(merge_sizes) == 0: merge_sizes.append(len(tasks)) else: merge_sizes.append(len(tasks) + merge_sizes[-1]) all_merge_results = submit_tasks(merge_tasks, **merge_opts) ret_val = [] for output, start, stop in zip(outputs, [0] + merge_sizes[:-1], merge_sizes): merge_results = all_merge_results[start:stop] df = df_open_merge(merge_results) if output is not None: path, table = output.split(":") df.to_hdf( path, table, format='table', append=False, complevel=9, complib='zlib', ) else: ret_val.append(df) return ret_val
def open_yaml(path): with open(path, 'r') as f: return yaml.full_load(f)
def submit_draw_data_mc( infile, drawer, cfg, outdir, nplots=-1, mode="multiprocessing", ncores=0, batch_opts="-q hep.q", ): with open(cfg, 'r') as f: cfg = yaml.full_load(f) # Read in dataframes df_data = pd.read_hdf(infile, "DataAggEvents") df_data = df_data.loc[("central", ), :] df_mc = pd.read_hdf(infile, "MCAggEvents") df_mc = df_mc.loc[("central", ), :] # dfs dfs = [] if df_data is not None: dfs.append(df_data) if df_mc is not None: dfs.append(df_mc) # varnames varnames = pd.concat(dfs).index.get_level_values("varname0").unique() # datasets if df_data is not None: datasets = df_data.index.get_level_values("parent").unique() else: datasets = ["None"] # cutflows cutflows = pd.concat(dfs).index.get_level_values("selection").unique() # group into histograms jobs = [] for varname in varnames: for dataset in datasets: for cutflow in cutflows: if varname not in cfg: continue job_cfg = copy.deepcopy(cfg[varname]) job_cfg.update(cfg.get("defaults", {})) job_cfg.update(cfg.get(dataset + "_dataset", {})) job_cfg.update(cfg.get(cutflow, {})) job_cfg.update( cfg.get(dataset + "_dataset", {}).get(cutflow, {})) job_cfg.update( cfg.get(dataset + "_dataset", {}).get(cutflow, {}).get(varname, {})) toutdir = os.path.join(outdir, dataset, cutflow) if not os.path.exists(toutdir): os.makedirs(toutdir) job_cfg["outpath"] = os.path.abspath( os.path.join(toutdir, cfg[varname]["outpath"])) # data selection if df_data is None or (varname, cutflow, dataset) not in df_data.index: df_data_loc = None else: df_data_loc = df_data.loc[(varname, cutflow, dataset), :] # mc selection if df_mc is None or (varname, cutflow) not in df_mc.index: df_mc_loc = None else: df_mc_loc = df_mc.loc[(varname, cutflow), :] jobs.append((df_data_loc, df_mc_loc, copy.deepcopy(job_cfg))) if nplots >= 0 and nplots < len(jobs): jobs = jobs[:nplots] parallel_draw(drawer, jobs, mode, ncores, batch_opts)
def yaml_read(fp): ''' read in yaml file and return dct ''' f = read_file(fp) return yaml.full_load(f)