Python get_datasets Examples, nittygriddy.utils.get_datasets Python Examples

Example #1

0

Show file

File: datasets.py Project: cbourjau/nittygriddy

def datasets(args):
    if args.list:
        _pprint_json(utils.get_datasets())
    elif args.show:
        ds = utils.get_datasets().get(args.show, None)
        if not ds:
            raise ValueError("Dataset not found.")
        _pprint_json(ds)
    elif args.search:
        search_datasets_for_string(args.search)
    elif args.download:
        ds_name, volume = args.download
        try:
            volume = float(volume)
        except ValueError:
            raise ValueError("Volume has to be a valid number, not `{}`".format(volume))
        if not utils.get_datasets().get(ds_name, False):
            raise ValueError("Dataset `{}` not found.".format(ds_name))
        elif args.run_list:
            # is the given run list a subset of the full run list?
            user_run_list = [int(r.strip()) for r in args.run_list.split(",")]
            full_run_list = [int(r.strip()) for r in
                             utils.get_datasets().get(ds_name)['run_list'].split(",")]
            if not set(user_run_list).issubset(full_run_list):
                raise ValueError("Run {} list is not a subset of this periods run list ({})!"
                                 .format(user_run_list, full_run_list))
        utils.download_dataset(ds_name, volume, args.run_list)

Example #2

0

Show file

File: datasets.py Project: rqwa/nittygriddy

def datasets(args):
    if args.list:
        utils.pprint_json(utils.get_datasets())
    elif args.show:
        ds = utils.get_datasets().get(args.show, None)
        if not ds:
            raise ValueError("Dataset not found.")
        utils.pprint_json(ds)
    elif args.search:
        search_datasets_for_string(args.search)
    elif args.download:
        ds_name, volume = args.download
        try:
            volume = float(volume)
        except ValueError:
            raise ValueError(
                "Volume has to be a valid number, not `{}`".format(volume))
        if not utils.get_datasets().get(ds_name, False):
            raise ValueError("Dataset `{}` not found.".format(ds_name))
        elif args.run_list:
            # is the given run list a subset of the full run list?
            user_run_list = [int(r.strip()) for r in args.run_list.split(",")]
            full_run_list = [
                int(r.strip()) for r in utils.get_datasets().get(ds_name)
                ['run_list'].split(",")
            ]
            if not set(user_run_list).issubset(full_run_list):
                raise ValueError(
                    "Run {} list is not a subset of this periods run list ({})!"
                    .format(user_run_list, full_run_list))
        utils.download_dataset(ds_name, volume, args.run_list)

Example #3

0

Show file

def run(args):
    utils.is_valid_project_dir()
    output_dir = _prepare_output_dir(args)
    # start the analysis
    os.chdir(output_dir)
    if args.runmode != "grid":
        # generate input file
        ds = utils.get_datasets()[args.dataset]
        # create list of local files
        with open(os.path.join(output_dir, "input_files.dat"),
                  "a") as input_files:
            search_string = os.path.join(settings["local_data_dir"],
                                         ds["datadir"].strip("/"), "*",
                                         ds["data_pattern"].strip("/"))
            search_string = os.path.expanduser(search_string)
            search_results = glob(search_string)
            if len(search_results) == 0:
                raise ValueError(
                    "No local files found at {} matching run list".format(
                        search_string))
            # Filter the found files to match the given run list
            if args.run_list:
                run_list = [r.strip() for r in args.run_list.split(",")]
            else:
                run_list = [r.strip() for r in ds["run_list"].split(",")]
            filtered_results = []
            for path in search_results:
                if any([r for r in run_list if r in path]):
                    filtered_results.append(path)
            if len(filtered_results) == 0:
                run_numbers = []
                for i in search_results:
                    i = i.split(ds["datadir"])[1].split("/")[0]
                    if i not in run_numbers:
                        run_numbers.append(i)
                raise ValueError(
                    "No local files found at {} matching run list good for required analysis. Try run numbers in {}"
                    .format(search_string, ", ".join(run_numbers)))
            input_files.write("\n".join(filtered_results) + "\n")
        # command to start the analysis
        cmd = ["root", "-l", "-q", "run.C"]
    else:
        cmd = ["root", "-l", "-q", "-b", "-x", 'run.C("full")']
    procs = []
    try:
        p = subprocess.Popen(cmd,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.STDOUT)
        procs.append(p)
        for line in iter(p.stdout.readline, b""):
            # rstrip to remove \n; doesn't like carriage returns
            print((line.rstrip()))
    except KeyboardInterrupt as e:
        for proc in procs:
            print("Killing: ", proc)
            proc.terminate()
        raise e

Example #4

0

Show file

File: run.py Project: akubera/nittygriddy

def run(args):
    utils.is_valid_project_dir()
    output_dir = _prepare_output_dir(args)
    # start the analysis
    os.chdir(output_dir)
    if args.runmode != "grid":
        # generate input file
        ds = utils.get_datasets()[args.dataset]
        # create list of local files
        with open(os.path.join(output_dir, "input_files.dat"),
                  'a') as input_files:
            search_string = os.path.join(settings["local_data_dir"],
                                         ds["datadir"].lstrip("/"), "*",
                                         ds["data_pattern"])
            search_string = os.path.expanduser(search_string)
            search_results = glob(search_string)
            # Filter the found files to match the given run list
            if args.run_list:
                run_list = [r.strip() for r in args.run_list.split(",")]
            else:
                run_list = [r.strip() for r in ds["run_list"].split(",")]
            filtered_results = []
            for path in search_results:
                if any([r for r in run_list if r in path]):
                    filtered_results.append(path)
            if len(filtered_results) == 0:
                raise ValueError(
                    "No local files found at {} matching run list".format(
                        search_string))
            input_files.write('\n'.join(filtered_results) + '\n')
        # command to start the analysis
        cmd = ['root', '-l', '-q', 'run.C']
    else:
        cmd = ['root', '-l', '-q', '-b', '-x', 'run.C(\"full\")']
    procs = []
    try:
        p = subprocess.Popen(cmd,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.STDOUT)
        procs.append(p)
        for line in iter(p.stdout.readline, b''):
            if isinstance(line, bytes):
                line = line.decode()
            print(line.rstrip()
                  )  # rstrip to remove \n; doesn't like carriage returns
    except KeyboardInterrupt as e:
        for proc in procs:
            print("Killing: ", proc)
            proc.terminate()
        raise e

Example #5

0

Show file

File: datasets.py Project: cbourjau/nittygriddy

def search_datasets_for_string(s):
    def flatten(dictionary):
        for key, value in dictionary.items():
            if isinstance(value, dict):
                # recurse
                for res in flatten(value):
                    yield res
            else:
                yield key, value
    datasets = utils.get_datasets()
    matches = []
    for dset_name, dset in datasets.items():
        for key, value in flatten(dset):
            if s in value:
                matches.append({dset_name: dset})
    _pprint_json(matches)

Example #6

0

Show file

File: datasets.py Project: rqwa/nittygriddy

def search_datasets_for_string(s):
    def flatten(dictionary):
        for key, value in dictionary.items():
            if isinstance(value, dict):
                # recurse
                for res in flatten(value):
                    yield res
            else:
                yield key, value

    datasets = utils.get_datasets()
    matches = []
    for dset_name, dset in datasets.items():
        for value in flatten(dset):
            if s in value:
                matches.append({dset_name: dset})
    utils.pprint_json(matches)

Example #7

0

Show file

File: run.py Project: akubera/nittygriddy

def run(args):
    utils.is_valid_project_dir()
    output_dir = _prepare_output_dir(args)
    # start the analysis
    os.chdir(output_dir)
    if args.runmode != "grid":
        # generate input file
        ds = utils.get_datasets()[args.dataset]
        # create list of local files
        with open(os.path.join(output_dir, "input_files.dat"), 'a') as input_files:
            search_string = os.path.join(settings["local_data_dir"],
                                         ds["datadir"].lstrip("/"),
                                         "*",
                                         ds["data_pattern"])
            search_string = os.path.expanduser(search_string)
            search_results = glob(search_string)
            # Filter the found files to match the given run list
            if args.run_list:
                run_list = [r.strip() for r in args.run_list.split(",")]
            else:
                run_list = [r.strip() for r in ds["run_list"].split(",")]
            filtered_results = []
            for path in search_results:
                if any([r for r in run_list if r in path]):
                    filtered_results.append(path)
            if len(filtered_results) == 0:
                raise ValueError("No local files found at {} matching run list".format(search_string))
            input_files.write('\n'.join(filtered_results) + '\n')
        # command to start the analysis
        cmd = ['root', '-l', '-q', 'run.C']
    else:
        cmd = ['root', '-l', '-q', '-b', '-x', 'run.C(\"full\")']
    procs = []
    try:
        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        procs.append(p)
        for line in iter(p.stdout.readline, b''):
            if isinstance(line, bytes):
                line = line.decode()
            print(line.rstrip()) # rstrip to remove \n; doesn't like carriage returns
    except KeyboardInterrupt as e:
        for proc in procs:
            print("Killing: ", proc)
            proc.terminate()
        raise e