Beispiel #1
0
def main():
    options = parse_args()
    mode = options.mode
    njobs = options.ncores

    # setup jobs
    with open(options.config, 'r') as f:
        cfg = yaml.full_load(f)

    # group jobs
    files = cfg["files"]
    if options.nfiles > 0:
        files = files[:options.nfiles]
    if mode in ["multiprocessing"] or njobs < 0:
        njobs = len(files)

    grouped_files = [list(x) for x in np.array_split(files, njobs)]
    tasks = [
        {"task": df_skim, "args": (fs,cfg,options.output.format(idx)), "kwargs": {}}
        for idx, fs in enumerate(grouped_files)
    ]

    if mode=="multiprocessing" and options.ncores==0:
        results = pysge.local_submit(tasks)
    elif mode=="multiprocessing":
        results = pysge.mp_submit(tasks, ncores=options.ncores)
    elif mode=="sge":
        results = pysge.sge_submit(
            "zdb", "_ccsp_temp/", tasks=tasks, options=options.sge_opts,
            sleep=5, request_resubmission_options=True,
        )
    print("Finished!")
Beispiel #2
0
def connections():
    if request.method == 'POST':
        # Connection page wants something
        act = request.form['action']
        if act == 'add':
            # First page of adding Connection
            return render_template('pages/connections-add.html', action=act)
        if act == 'add2':
            # Second page of adding Connection
            mark = request.form['market']
            if mark == 'crypto':
                ex = ccxt.exchanges
                return render_template('pages/connections-add.html',
                                       action=act,
                                       market=mark,
                                       exch=ex,
                                       len=len(ex))
            if mark == 'forex':
                return render_template('pages/connections-add.html',
                                       action=act,
                                       market=mark)
        if act == 'fin':
            # Setup of exchange has finished create the connection
            ex = request.form['exchSel']
            market = request.form['market']
            if market == 'crypto':
                do.createCryptoCon(ex)
            return redirect("/connections")
        if act == 'info':
            # Create temp exchange instance based on post data
            ex = request.form['ex']
            return do.createCryptoInfo(ex)
        if act == 'fullinfo':
            con = request.form['con']
            # Create pathname and load connection config
            cfname = confPath + 'conn' + os.path.sep + con + '.yml'
            with open(cfname, 'r') as file:
                cfdata = yaml.full_load(file)
            # Create table in html
            cftable = "<table>"
            for key in cfdata:
                cftable = cftable + "<tr><th>" + str(key) + "</th><td>" + str(
                    cfdata[key]) + "</td></tr>"
            cftable = cftable + "</table>"
            return cftable
        if act == 'delete':
            # Delete connection
            flash('Connection Deleted!', 'important')
            # Delete file
            delfile = confPath + 'conn' + os.path.sep + request.form[
                'con'] + '.yml'
            os.remove(delfile)
            return redirect("/connections")

    else:
        connections = do.allCfgs('conn')
        return render_template('pages/connections.html',
                               connections=connections)
Beispiel #3
0
    def run(self):
        self.logger.info("Reading input file: {}".format(self.input_file))
        with open(self.input_file, "r") as inputfile:
            data = yaml.full_load(inputfile.read())
        reads = {}
        for sample, units in data['samples'].items():
            reads[sample] = {'R1': [],
                             'R2': []}
            for unit in units:
                for f in data['units'][unit]:
                    reads[sample][self.get_read_pair(f)].append(f)

        new_samples = {}
        if self.config: self.logger.info("Skipping merge: --config_only mode activated")
        for s, pairs in reads.items():
            cmd = ['cat']
            for p in pairs['R1']:
                 cmd.append(p)
            cmd.append('>' + os.path.join(self.folder,'{}_R1.fastq.gz'.format(s)))
            if not self.config:
                self.logger.info("Running merge command: {}".format(cmd))
                subprocess.run(' '.join(cmd), shell=True)
            cmd = ['cat']
            for p in pairs['R2']:
                cmd.append(p)
            cmd.append('>' + os.path.join(self.folder,'{}_R2.fastq.gz'.format(s)))
            if not self.config:
                if self.paired:
                    self.logger.info("Paired Reads mode activated: merging R2 reads.")
                    self.logger.info("Running R2 merge command: {}".format(cmd))
                    subprocess.run(' '.join(cmd), shell=True)

            workdir = os.getcwd()
            new_samples[s] = os.path.join(workdir,self.folder,'{}_R1.fastq.gz'.format(s))
        yaml_template = 'config.template.yaml'
        with open(yaml_template, "r") as inputfile:
            new_data = yaml.full_load(inputfile.read())
        new_data['samples'] = new_samples
        yaml_project = 'config.project.{}.yaml'.format(self.project)
        if not self.merge:
            self.logger.info("Writing configfile: {}".format(yaml_project))
            with open(yaml_project, "w") as outfile:
                yaml.dump(new_data, outfile, indent=4)
        if self.merge:
            self.logger.info("Skipping configfile generation: --merge_only mode activated")
Beispiel #4
0
 def createANN(self, nugget, nom, testsplit, scaler, scarcity,
               inputlayerunits, hiddenlayers, hiddenlayerunits, optimizer,
               loss, metrics, batchsize, epoch):
     # Create ANN YAML
     id = nom.lower()
     annYML = 'id: ' + id + "\n"
     annYML = annYML + 'name: ' + nom + "\n"
     annYML = annYML + 'nugget: ' + nugget + "\n"
     annYML = annYML + 'training: True' + "\n"
     annYML = annYML + 'scaler: ' + scaler + "\n"
     # print(scarcity,file=sys.stderr)
     if scarcity == "on":
         annYML = annYML + 'scarcity: True' + "\n"
     else:
         annYML = annYML + 'scarcity: False' + "\n"
     annYML = annYML + 'testsplit: ' + testsplit + "\n"
     # ANN Layers
     annYML = annYML + 'inputlayerunits: ' + inputlayerunits + "\n"
     annYML = annYML + 'hiddenlayers: ' + hiddenlayers + "\n"
     annYML = annYML + 'hiddenlayerunits: ' + hiddenlayerunits + "\n"
     # Fitting
     annYML = annYML + 'optimizer: ' + optimizer + "\n"
     annYML = annYML + 'loss: ' + loss + "\n"
     annYML = annYML + 'metrics: ' + metrics + "\n"
     annYML = annYML + 'batchsize: ' + batchsize + "\n"
     annYML = annYML + 'epoch: ' + epoch + "\n"
     # Training
     annYML = annYML + 'lasttrain: 0' + "\n"
     annYML = annYML + 'trainaccuracy: 0' + "\n"
     annYML = annYML + 'testaccuracy: 0' + "\n"
     # Add Nugget Info
     nfile = self.nuggetDataPath + nugget + '.pkl'
     # df = pd.read_feather(nfile)
     info = self.nugInfo(nfile)
     # Add info from nuggetinfo and enrichments
     annYML = annYML + 'symb: ' + info['symb'] + "\n"
     annYML = annYML + 'timeframe: ' + info['timeframe'] + "\n"
     annYML = annYML + 'from: ' + str(info['from']) + "\n"
     annYML = annYML + 'to: ' + str(info['to']) + "\n"
     annYML = annYML + 'depen: ' + info['depen'] + "\n"
     # indis = list(df.columns[0].values.tolist())
     with open(self.enConfPath + info['indi'] + '.yml', 'r') as afile:
         indi = yaml.full_load(afile)
     # print(indi['riches'],file=sys.stderr)
     annYML = annYML + 'indi: ' + indi['riches'] + "\n"
     # Delete empty lines
     annYML = os.linesep.join([s for s in annYML.splitlines() if s])
     # Save to YAML file
     self.writeRawCfgFile('aiann', id, annYML)
Beispiel #5
0
def read_menu_from_github(conf, save=False):
    """Read master.yaml from GitHub"""
    token = os.getenv("GITHUB_TOKEN")
    print(token)
    g = github.Github(token)
    repo = g.get_repo(conf.git_remote)
    try:
        master = repo.get_contents("master.yaml")
    except github.GithubException:
        master = repo.get_contents("master.yaml")

    master_dict = yaml.full_load(master.decoded_content)

    if save:
        save_yaml(master_dict, config.reference_dir)
    else:
        return master_dict
Beispiel #6
0
def multi_skim(
    configs,
    mode='multiprocessing',
    ncores=0,
    nfiles=-1,
    batch_opts="",
    outputs=None,
    chunksize=250000,
):
    all_tasks = []

    for config, output in zip(configs, outputs):
        outdir = os.path.dirname(output)
        if not os.path.exists(outdir):
            os.makedirs(outdir)

        njobs = ncores

        #setup jobs
        with open(config, 'r') as f:
            cfg = yaml.full_load(f)

        # group jobs
        files = cfg["files"]
        if nfiles > 0:
            files = files[:nfiles]
        if mode in ["multiprocessing"] or njobs < 0:
            njobs = len(files)

        grouped_files = [list(x) for x in np.array_split(files, njobs)]

        tasks = [{
            "task": job,
            "args": (fs, copy.deepcopy(cfg), output.format(idx)),
            "kwargs": {
                "chunksize": chunksize
            },
        } for idx, fs in enumerate(grouped_files)]
        all_tasks.extend(tasks)

    submit_tasks(all_tasks, mode, ncores, batch_opts)
    print("Finished!")
Beispiel #7
0
 def readCfgFile(self, oftype, nom):
     fname = self.confPath + oftype + os.path.sep + nom
     with open(fname, 'r') as file:
         output = yaml.full_load(file)
     return output
Beispiel #8
0
def read_yaml(file_path):
    """Simple function to read yaml file"""
    with open(file_path) as yml:
        dict_ = yaml.full_load(yml)
    return dict_
Beispiel #9
0
modes_module_path = os.path.join(exptool_package_path, "modes.py")
modes_module_spec = importlib.util.spec_from_file_location(
    "acconeer.exptool.modes", modes_module_path)
modes_module = importlib.util.module_from_spec(modes_module_spec)
modes_module_spec.loader.exec_module(modes_module)

parser = argparse.ArgumentParser()
parser.add_argument("input_filename")
args = parser.parse_args()
in_fn = args.input_filename

out_fn = os.path.join(exptool_package_path, "data", "regmap.yaml")
assert os.path.exists(os.path.dirname(out_fn))

with open(in_fn, "r") as in_f:
    d = yaml.full_load(in_f)


def clean(d):
    to_pop = []

    for k, v in d.items():
        if k == "description":
            to_pop.append(k)
            continue

        if type(v) == dict:
            if v.get("internal", False):
                to_pop.append(k)
                continue
Beispiel #10
0
    2]  #"https://raw.githubusercontent.com/BBerastegui/fresh-dns-servers/master/resolvers.txt"

print('downloading data...')
r = requests.get(URL)

print('processing data...')
response_text = r.text
new_ips = []
for txt in response_text.splitlines():
    new_ips.append(txt)

print('read yaml file...')
yaml_dict = {}

with open(INPUT_FILE_NAME) as file:
    documents = yaml.full_load(file)
    documents['resolvers'] = new_ips

print('write yaml file with processed data...')
with open(INPUT_FILE_NAME, 'w') as file:
    d = yaml.dump(documents, file, indent=4)

old_string = '- '
new_string = '    - '

# Safely read the input filename using 'with'
with open(INPUT_FILE_NAME) as f:
    s = f.read()

# Safely write the changed content, if found in the file
with open(INPUT_FILE_NAME, 'w') as f:
log.info(
    "Reading IP and port of {} from env vars".format(cc_db_var_name_vimemu))
env_vars = os.environ.keys()
cc_db_ip = os.getenv([
    var for var in env_vars
    if (cc_db_var_name_k8s in var and cc_db_host_k8s in var)
][0])
if not cc_db_ip:
    cc_db_ip = os.getenv(cc_db_var_name_vimemu + "_ip", "localhost")
cc_db_port = os.getenv([
    var for var in env_vars
    if (cc_db_var_name_k8s in var and cc_db_port_k8s in var)
][0])
if not cc_db_port:
    cc_db_port = os.getenv(cc_db_var_name_vimemu + "_port", "9090")
cc_db_url = "http://{}:{}".format(cc_db_ip, cc_db_port)

# use that to replace the URL in the configuration
datasource_path = "/etc/grafana/provisioning/datasources/datasource.yml"
log.info("Updating configuration in {} accordingly".format(datasource_path))
f_r = open(datasource_path, "r")
ds = yaml.full_load(f_r)
ds['datasources'][0]['url'] = cc_db_url
f_r.close()
f_w = open(datasource_path, "w")
log.debug("Replacing datasource URL")
f_w.write(yaml.safe_dump(ds))
f_w.close()

log.info("Done updating Grafana configurations")
Beispiel #12
0
def analyse(
    config,
    mode="multiprocesing",
    ncores=0,
    nfiles=-1,
    batch_opts="",
    output=None,
    chunksize=500000,
    merge_opts={},
):
    if len(output.split(":")) != 2:
        raise ValueError(
            "The output kwarg should be None or a string with the format "
            "'{file_name}:{table_name}' instead of " + "{}".format(output))

    njobs = ncores

    # setup jobs
    with open(config, 'r') as f:
        cfg = yaml.full_load(f)

    # group jobs
    files = cfg["files"]
    if nfiles > 0:
        files = files[:nfiles]
    if mode in ["multiprocessing"] or njobs < 0:
        njobs = len(files)

    grouped_files = [list(x) for x in np.array_split(files, njobs)]
    tasks = [{
        "task": df_process,
        "args": (fs, cfg["query"]),
        "kwargs": {
            "chunksize": chunksize
        },
    } for fs in grouped_files]
    results = submit_tasks(tasks,
                           mode=mode,
                           ncores=ncores,
                           batch_opts=batch_opts)
    if mode == 'multiprocessing':
        df = functools.reduce(lambda x, y: df_merge(x, y), results)
    else:
        # grouped multi-merge
        merge_njobs = merge_opts.get("ncores", 100)
        grouped_merges = [
            list(x) for x in np.array_split(results, merge_njobs)
        ]
        tasks = [{
            "task": df_open_merge,
            "args": (r, ),
            "kwargs": {},
        } for r in grouped_merges]
        merge_mode = merge_opts.get("mode", "multiprocessing")
        if merge_mode == "multiprocessing" and ncores == 0:
            semimerged_results = pysge.local_submit(tasks)
            df = functools.reduce(lambda x, y: df_merge(x, y), results)
        elif mode == "multiprocessing":
            semimerged_results = pysge.mp_submit(tasks, ncores=ncores)
            df = functools.reduce(lambda x, y: df_merge(x, y), results)
        elif mode == "sge":
            semimerged_results = pysge.sge_submit(
                tasks,
                "zdb-merge",
                "_ccsp_temp",
                options=merge_opts.get("batch_opts", "-q hep.q"),
                sleep=5,
                request_resubmission_options=True,
                return_files=True,
            )
            df = df_open_merge(semimerged_results)

    if output is not None:
        path, table = output.split(":")
        df.to_hdf(
            path,
            table,
            format='table',
            append=False,
            complevel=9,
            complib='zlib',
        )
    else:
        return df
Beispiel #13
0
def multi_analyse(
    configs,
    mode="multiprocesing",
    ncores=0,
    nfiles=-1,
    batch_opts="",
    outputs=None,
    chunksize=500000,
    merge_opts={},
):
    for output in outputs:
        if len(output.split(":")) != 2:
            raise ValueError(
                "The output kwarg should be None or a string with the format "
                "'{file_name}:{table_name}' instead of " + "{}".format(output))

    all_tasks, sizes = [], []
    for config in configs:
        njobs = ncores

        # setup jobs
        with open(config, 'r') as f:
            cfg = yaml.full_load(f)

        # group jobs
        files = cfg["files"]
        if nfiles > 0:
            files = files[:nfiles]
        if mode in ["multiprocessing"] or njobs < 0:
            njobs = len(files)

        grouped_files = [list(x) for x in np.array_split(files, njobs)]
        tasks = [{
            "task": df_process,
            "args": (fs, cfg["query"]),
            "kwargs": {
                "chunksize": chunksize
            },
        } for fs in grouped_files]
        all_tasks.extend(tasks)
        if len(sizes) == 0:
            sizes.append(len(tasks))
        else:
            sizes.append(len(tasks) + sizes[-1])

    all_results = submit_tasks(all_tasks,
                               mode=mode,
                               ncores=ncores,
                               batch_opts=batch_opts)

    merge_tasks, merge_sizes = [], []
    for start, stop in zip([0] + sizes[:-1], sizes):
        results = all_results[start:stop]

        if mode == 'multiprocessing':
            df = functools.reduce(lambda x, y: df_merge(x, y), results)
        else:
            # grouped multi-merge
            merge_njobs = merge_opts.get("ncores", 100)
            grouped_merges = [
                list(x) for x in np.array_split(results, merge_njobs)
            ]
            tasks = [{
                "task": df_open_merge,
                "args": (r, ),
                "kwargs": {},
            } for r in grouped_merges]
            merge_tasks.extend(tasks)
            if len(merge_sizes) == 0:
                merge_sizes.append(len(tasks))
            else:
                merge_sizes.append(len(tasks) + merge_sizes[-1])

    all_merge_results = submit_tasks(merge_tasks, **merge_opts)

    ret_val = []
    for output, start, stop in zip(outputs, [0] + merge_sizes[:-1],
                                   merge_sizes):
        merge_results = all_merge_results[start:stop]
        df = df_open_merge(merge_results)

        if output is not None:
            path, table = output.split(":")
            df.to_hdf(
                path,
                table,
                format='table',
                append=False,
                complevel=9,
                complib='zlib',
            )
        else:
            ret_val.append(df)
    return ret_val
Beispiel #14
0
def open_yaml(path):
    with open(path, 'r') as f:
        return yaml.full_load(f)
Beispiel #15
0
def submit_draw_data_mc(
    infile,
    drawer,
    cfg,
    outdir,
    nplots=-1,
    mode="multiprocessing",
    ncores=0,
    batch_opts="-q hep.q",
):
    with open(cfg, 'r') as f:
        cfg = yaml.full_load(f)

    # Read in dataframes
    df_data = pd.read_hdf(infile, "DataAggEvents")
    df_data = df_data.loc[("central", ), :]
    df_mc = pd.read_hdf(infile, "MCAggEvents")
    df_mc = df_mc.loc[("central", ), :]

    # dfs
    dfs = []
    if df_data is not None:
        dfs.append(df_data)
    if df_mc is not None:
        dfs.append(df_mc)

    # varnames
    varnames = pd.concat(dfs).index.get_level_values("varname0").unique()

    # datasets
    if df_data is not None:
        datasets = df_data.index.get_level_values("parent").unique()
    else:
        datasets = ["None"]

    # cutflows
    cutflows = pd.concat(dfs).index.get_level_values("selection").unique()

    # group into histograms
    jobs = []
    for varname in varnames:
        for dataset in datasets:
            for cutflow in cutflows:
                if varname not in cfg:
                    continue
                job_cfg = copy.deepcopy(cfg[varname])
                job_cfg.update(cfg.get("defaults", {}))
                job_cfg.update(cfg.get(dataset + "_dataset", {}))
                job_cfg.update(cfg.get(cutflow, {}))
                job_cfg.update(
                    cfg.get(dataset + "_dataset", {}).get(cutflow, {}))
                job_cfg.update(
                    cfg.get(dataset + "_dataset", {}).get(cutflow,
                                                          {}).get(varname, {}))
                toutdir = os.path.join(outdir, dataset, cutflow)
                if not os.path.exists(toutdir):
                    os.makedirs(toutdir)
                job_cfg["outpath"] = os.path.abspath(
                    os.path.join(toutdir, cfg[varname]["outpath"]))

                # data selection
                if df_data is None or (varname, cutflow,
                                       dataset) not in df_data.index:
                    df_data_loc = None
                else:
                    df_data_loc = df_data.loc[(varname, cutflow, dataset), :]

                # mc selection
                if df_mc is None or (varname, cutflow) not in df_mc.index:
                    df_mc_loc = None
                else:
                    df_mc_loc = df_mc.loc[(varname, cutflow), :]

                jobs.append((df_data_loc, df_mc_loc, copy.deepcopy(job_cfg)))

    if nplots >= 0 and nplots < len(jobs):
        jobs = jobs[:nplots]
    parallel_draw(drawer, jobs, mode, ncores, batch_opts)
Beispiel #16
0
def yaml_read(fp):
    ''' read in yaml file and return dct '''
    f = read_file(fp)
    return yaml.full_load(f)