Example #1
0
class BMP280Sensor(AbstractTemperatureSensor):
    """
     Class to read the sensor BMP280 on the I2C bus

    To find the sensor id, make sure the i2c bus is enabled, the device is connected, and 
    mp3115a2 module is loaded. Additionally, you have to tell the i2c bus to read the chip
    with the 
    
    "echo bmp280 0x76" > /sys/bus/i2c/devices/i2c-1/new_device

    ...command. The i2c-1 bus number may change if the system has more than one i2c bus loadded.

     Then look in /sys/bus/i2c/devices directory for the 1-0076 directory.

     The 0x76 above and the 1-0076 represents the i2c bus id. The bus id can be determined
     with the i2cdetect command is needed. Some bmp280 sensors have ids of 0x77.

     The bme280 should also be supported but the humidity value will not be read

     """
    def __init__(self, temperature_config):
        super().__init__(temperature_config)
        self.property_bus = "i2c"
        devicepath = PosixPath("/sys/bus/i2c/devices").joinpath(
            temperature_config.device).joinpath("iio:device0")
        self.temperature_path = PosixPath(devicepath.joinpath("in_temp_input"))
        self.pressure_path = PosixPath(
            devicepath.joinpath("in_pressure_input"))
        # Make sure they exist
        if (not self.temperature_path.exists()
                and not self.temperature_path.is_file()):
            raise DeviceError(self.temperature_path)
        if (not self.pressure_path.exists()
                and not self.pressure_path.is_file()):
            raise DeviceError(self.pressure_path)

    @property
    def temperature(self):
        with self.temperature_path.open() as f:
            data = f.readline()
            data = str.strip(data)
        result = int(data) / 1000
        if (self.property_sensor_config.rounding != -1):
            result = round(result, self.property_sensor_config.rounding)
        return (result)

    @property
    def pressure(self):
        with self.pressure_path.open() as f:
            data = f.readline()
            data = str.strip(data)
        result = float(data) * 10
        if (self.property_sensor_config.rounding != -1):
            result = round(result, self.property_sensor_config.rounding)
        return result

    @property
    def bus(self):
        return self.property_bus
Example #2
0
class BH1750Sensor(AbstractLightSensor):
    """
    Class to read the sensor BH1750 on the I2C bus

    To find the sensor id, make sure the i2c bus is enabled, the device is connected, and 
    BH1750 module is loaded. Additionally, you have to tell the i2c bus to read the chip
    with the 
    
    "echo bh1750 0x23" > /sys/bus/i2c/devices/i2c-1/new_device

    ...command. The i2c-1 bus number may change if the system has more than one i2c bus loadded.

     Then look in /sys/bus/i2c/devices directory for the 1-0023 directory.

     The 0x23 above and the 1-0023 represents the i2c bus id. The bus id can be determined
     with the i2cdetect command is needed.

    """
    def __init__(self, lightsensor_config):
        super().__init__(lightsensor_config)
        self.property_bus = "i2c"
        devicepath = PosixPath("/sys/bus/i2c/devices").joinpath(
            lightsensor_config.device).joinpath("iio:device0")
        self.lightsensor_path_raw = PosixPath(
            devicepath.joinpath("in_illuminance_raw"))
        self.lightsensor_path_scale = PosixPath(
            devicepath.joinpath("in_illuminance_scale"))
        # Make sure they exist
        if (not self.lightsensor_path_raw.exists()
                and not self.lightsensor_path_raw.is_file()):
            raise DeviceError(self.lightsensor_path_raw)
        if (not self.lightsensor_path_scale.exists()
                and not self.lightsensor_path_scale.is_file()):
            raise DeviceError(self.lightsensor_path_scale)

    def dispose(self):
        pass

    @property
    def lightlevel(self):
        with self.lightsensor_path_raw.open() as f:
            data_raw = f.readline()
        with self.lightsensor_path_scale.open() as f:
            data_scale = f.readline()
        result = int(data_raw) * float(data_scale)
        if (self.property_sensor_config.rounding != -1):
            result = round(result, self.property_sensor_config.rounding)
        return result

    @property
    def bus(self):
        return self.property_bus
Example #3
0
    def _load_json(self, f: PosixPath) -> dict:
        data: dict = {}

        with f.open(encoding='utf-8') as fin:
            print(f'--- processing {f.stem} ---')
            data = json.load(fin)

        if not 'entity' in data:
            data['entity'] = Entity.eid()

            with f.open(mode='w', encoding='utf-8') as fout:
                json.dump(data, fout, ensure_ascii=False, indent=2)

        return data
Example #4
0
def read_file_text(filename: Path):
    """return all text contents in line-based array of file or 'None' if not a text file"""
    filename = Path(filename)
    current_app.logger.info('Reading text from file: %s', filename.resolve())
    text_lines = []
    try:
        filename.open()
        text_lines = filename.read_text().splitlines()
        current_app.logger.debug('from %s text_lines[0:1]: %s', filename.name,
                                 str(text_lines[0:1]))
        return text_lines

    except Exception as e:
        current_app.logger.error('Errors reading from file: %s!',
                                 filename.resolve())
        return None
Example #5
0
def open_md(p: PosixPath):
    data = dict(
        title="",
        slug="",
        date="",
        tags=[],
        category="",
        content="",
        published=True,
    )
    with p.open() as f:
        lines = f.readlines()
    content_start_line, line_count = 0, 0
    pre_field = ""
    for i, line in enumerate(lines):
        if ":" in line:
            field, value = line.split(":", 1)
            if field != "tags" and field in data:
                data[field] = value.strip().strip("'").strip('"').strip("-")
            pre_field = field
        elif pre_field == "tags":
            data["tags"].append(line.strip().strip("-").strip())
        if line.startswith("---"):
            line_count += 1
        if line_count == 2:
            content_start_line = i
            break
    content = (textify("".join(lines[content_start_line:])).replace(
        "--", "").replace("more", "").replace("\n", ""))
    data["content"] = re.sub("```(.|\n)*?```", " ", content)
    data["url"] = f"/posts/{data.pop('slug')}/"
    d = datetime.fromisoformat(data["date"])
    data["date"] = int(d.timestamp())
    if data.pop("published") != "false":
        blogs.append(data)
Example #6
0
def get_blank_systematics(config_file):
    """Get list of NPs and categories from TRExFitter config file

    Parameters
    ----------
    config_file : str
        name of config file

    Returns
    -------
    nps : list
        list of nuisance parameters
    categories: set
        all categories that were found
    """
    trex_config = PosixPath(config_file)
    nps = []
    with trex_config.open("r") as f:
        config_blocks = f.read().split("\n\n")
        for block in config_blocks:
            if "Systematic:" in block:
                if block[0] == "%":
                    continue
                block = block.replace("\n  ", "\n")
                s = yaml.load(block, Loader=yaml.FullLoader)
                np = NuisPar(name=s["Systematic"],
                             category=s["Category"],
                             title=s["Title"])
                nps.append(np)
    categories = set()
    nps = {np.name: np for np in nps}
    for npn, np in nps.items():
        categories.add(np.category)
    return nps, categories
Example #7
0
def read_raw_words(
        words_path: pathlib.PosixPath = DEFAULT_WORDS_PATH) -> List[str]:
    if not words_path.exists():
        raise FileNotFoundError(
            f'File at path {words_path} doesn\'t seem to exist.')
    with words_path.open() as f:
        lines = [x.strip() for x in f.readlines()]
    return lines
Example #8
0
 def _load_bare_config(self):
     log.debug("Loading configuration file: %s", self.__resolved_config_file)
     config_file_path = PosixPath(self.__resolved_config_file)
     if config_file_path.exists():
         with config_file_path.open() as f:
             self.__bare_config_dict = json.load(f)
     else:
         self.__bare_config_dict = {}
Example #9
0
def run_agent(conf, log):
    """ Start the agent in the background.

    This command starts the agent in the background after it initiates
    the logger to use a log file and loads the configuration file. The
    agent runs indefinitively until it's explicitly terminated.

    The --conf and --log flags allow to change the location (and name)
    of the configuration and log files. By default, it uses
    'yorokobi.conf' and 'yorokobi.log' located in the current working
    directory. The log file is created if it doesn't exist. If the
    configuration file doens't exist, it uses the default configuration
    values.

    The agent fails to start if an error occurs during reading the
    configuration file, if the log file can't be created or if the
    connection with the backup server can't be etablished.
    """

    config_filename = PosixPath(conf)
    log_filename = PosixPath(log)

    # load the configuration file (use default configuration values if
    # it doesn't exist)
    if config_filename.exists():
        config_file = config_filename.open('r')
        config = load_configuration(config_file)
        config_file.close()
    else:
        config = get_default_configuration()
        config_file = config_filename.open('w+')
        save_configuration(config_file, config)
        config_file.close()

    # print startup message
    print("The Yorokobi agent is starting.", end="\n\n")
    print("Configuration file is '{0}'".format(str(config_filename)))
    print("Log file is '{0}'".format(str(log_filename)), end="\n\n")

    # create the agent instance and start its main loop
    agent = Agent(config, config_filename, log_filename)
    agent.run()

    # print 'successfully stopped agent' message
    print('The Yorokobi agent has succesfully stopped')
Example #10
0
def _css_to_vue(target_file: pathlib.PosixPath) -> None:
    written = pathlib.Path(f'./_dest/{target_file}.vue')
    written.parent.mkdir(parents=True, exist_ok=True)
    with target_file.open() as r:
        with written.open(mode='w') as w:
            w.write('<template>\n' '  <div />\n' '</template>\n' '<style>\n')
            for line in r.readlines():
                w.write(line)
            w.write('</style>\n')
Example #11
0
def read_config(cfg_path: PosixPath) -> Dict:
    if cfg_path.exists():
        with cfg_path.open('r') as fp:
            yaml = YAML(typ="safe")
            cfg = yaml.load(fp)
    else:
        raise FileNotFoundError(cfg_path)

    cfg = parse_config(cfg)

    return cfg
Example #12
0
def default_to_gpt2_format(tokenizer, fromFile: pathlib.PosixPath,
                           toFile: pathlib.PosixPath) -> None:
    lst_input_ids = []
    with fromFile.open('rb') as fromF:
        lst_dlgs = pickle.load(fromF)
        for dlg in lst_dlgs:
            assert len(dlg['user']) == len(dlg['bot'])
            # persona is not used, so it is ignored
            history = '<BOS>'
            for i, (u_str, b_str) in enumerate(zip(dlg['user'], dlg['bot'])):
                seq = " ".join([history, u_str, '<SEP>', b_str, '<EOS>'])
                history = " ".join([history, u_str, b_str])
                try:
                    idx = dlg['bot_idx'].index(i)
                    history = " ".join(
                        [history, " ".join(dlg['api_call_result'][idx])])
                except ValueError:
                    pass
                seq_ids = tokenizer(seq,
                                    return_length=True,
                                    return_token_type_ids=False,
                                    return_attention_mask=False)
                if seq_ids.length <=\
                        tokenizer.max_model_input_sizes['distilgpt2']:
                    # NOTE "copy.deepcopy" is not needed below
                    lst_input_ids.append(seq_ids['input_ids'])
                else:
                    # (1) Look at future turns to find what sequences of
                    # "api_call_result" are relevant; Remove all except those
                    # relevant sequences and a few irrelevant ones; (2) If
                    # point 1 doesn't work then remove all sequences in
                    # "api_call_result"; (3) If pt 2 doesn't work then remove
                    # the early turns involving both the user and the bot;
                    # Nothing is implemented yet except the seq_ids is NOT
                    # added to lst_input_ids
                    pass
    with toFile.open('wb') as toF:
        logg.info(f'Done writing to file {toFile}')
        pickle.dump(lst_input_ids, toF, protocol=pickle.HIGHEST_PROTOCOL)
Example #13
0
def _html_to_vue(target_file: pathlib.PosixPath) -> None:
    written = pathlib.Path(f'./_dest/{target_file}.vue')
    written.parent.mkdir(parents=True, exist_ok=True)
    with target_file.open() as r:
        with written.open(mode='w') as w:
            w.write('<template>\n')
            for line in r.readlines():
                if '<!doctype html>' in line:
                    continue
                if line == '\n':
                    w.write(line)
                    continue
                w.write(f'  {line}')
            w.write('</template>\n')
Example #14
0
def read_file_content(path: PosixPath):
    code_list = []
    with path.open() as f:
        contents = f.readlines()
        for content in contents:
            content = content.rstrip('\n')
            if content.startswith(f'My{path.stem}'):
                # if content.startswith(f'MyDreamFactory'):
                #     share_code = content.split("'")[1]
                # else:
                share_code = content.split('=', maxsplit=1)[1].strip("'")

                if share_code:
                    code_list.append(share_code)
    return '&'.join(code_list)
def highest_seat_ID(path_to_passes: pathlib.PosixPath):
    if not path_to_passes.exists():
        raise FileExistsError('No file exists at the path provided.')

    with path_to_passes.open('r') as f:
        passes = f.read().split('\n')

    highest_id = 0
    for boarding_pass in passes:
        data = decode_binary_boarding(boarding_pass)
        highest_id = max(data[-1], highest_id)

    print(f'The highest seat ID in the data is: {highest_id}')

    return highest_id
Example #16
0
def img_format(image_path: PosixPath) -> (str, None):
    """
    Checks the file signature (magic number)
            for an image

    :param image_path: The path to the image
    :return: True if the image is PNG or JPG
    """

    signatures = {'JPG': 'ffd8ff', 'PNG': '89504e', 'GIF': '474946'}

    with image_path.open('rb') as img_file:
        signature = img_file.read(3).hex()
        for sig in signatures:
            if signature == signatures[sig]:
                return sig
    return None
Example #17
0
def lock(path: pathlib.PosixPath):
    """Creates a lock file,
    a file protected from beeing used by other processes.
    (The lock file isn't the same as the file of the passed path.)

    Args:
        path (pathlib.PosixPath): path to the file
    """
    path = path.with_suffix('.lock')

    if not path.exists():
        path.touch()

    with path.open("r+") as lock_file:
        try:
            fcntl.lockf(lock_file.fileno(), fcntl.LOCK_EX)
            yield lock_file
        finally:
            fcntl.lockf(lock_file.fileno(), fcntl.LOCK_UN)
Example #18
0
    def __add_to_file(self, path_to_file: pathlib.PosixPath,
                      template_dict: dict, template_name: str):
        """Add to a file from a template stored in the templates directory.

        Args:
            path_to_file (pathlib.PosixPath):\
                path to the file that needs to be updated.
            template_dict (dict):\
                used to customise parts of the template. The variable names \
                matching a key in the dict will be replaced with the \
                respective value.
            template_name (str):\
                template_name of the template file in the templates directory.

        Raises:
            TypeError: if the path input is not to a file.
            FileNotFoundError: if the path input does not exist.
            FileNotFoundError: if the project directory does not exist.
            FileNotFoundError: if the template file does not exist.
        """
        if not path_to_file.is_file():
            raise TypeError('Please input path to a file.')
        elif not path_to_file.exists():
            raise FileNotFoundError(f'{path_to_file} does not exist.')
        elif self.proj_dir is None or not self.proj_dir.exists():
            raise FileNotFoundError('You need to create a project directory.')

        if template_name is None:
            template_name = path_to_file.name + '.template'

        path_temp = Path.cwd() / 'templates' / template_name

        if not path_temp.exists():
            raise FileNotFoundError(f'No {template_name} file template was'
                                    ' found in the current directory.')

        template_str = path_temp.open('r').read()
        template = Template(template_str)

        write_to_file = template.render(template_dict)

        with path_to_file.open('w') as main:
            main.write(write_to_file)
Example #19
0
class DS18B20Sensor(AbstractTemperatureSensor):
    """
    Class to read the sensor DS18B20 on the one wire bus

    To find the sensor id, make sure the one wire bus is enabled, the device is connected, and 
    w1_therm module is loaded. Then look in /sys/bus/w1/devices for a file startig with 28-*. 
    Record that number and that is the device. The device will be different for each sensor
    on the w1 bus.

    """
    def __init__(self, temperature_config):
        super().__init__(temperature_config)
        self.property_bus = "w1"
        devicepath = PosixPath("/sys/bus/w1/devices").joinpath(
            temperature_config.device)
        self.temperature_path = PosixPath(devicepath.joinpath("w1_slave"))
        # Make sure they exist
        if (not self.temperature_path.exists()
                and not self.temperature_path.is_file()):
            raise DeviceError(self.temperature_path)

    @property
    def temperature(self):
        with self.temperature_path.open() as f:
            f.readline()
            line = f.readline()

        data = line.split('=')
        result = int(data[1]) / 1000
        if (self.property_sensor_config.rounding != -1):
            result = round(result, self.property_sensor_config.rounding)
        return result

    @property
    def pressure(self):
        return 0

    @property
    def bus(self):
        return self.property_bus
Example #20
0
def run_pulls(args):
    """Given command line arguments generate pull plots

    Parameters
    ----------
    args : argparse.ArgumentParser

    """
    systematics, categories = get_blank_systematics(args.config)
    fit_name = PosixPath(args.workspace).stem
    fit_result = PosixPath(f"{args.workspace}/Fits/{fit_name}.txt")
    np_by_cat = {c: [] for c in categories}
    with fit_result.open("r") as f:
        lines = f.read().split("CORRELATION_MATRIX")[0].strip()
        for line in lines.split("\n")[2:-1]:
            if line.startswith("gamma"):
                continue
            elements = line.split()
            systematics[elements[0]].mean = float(elements[1])
            systematics[elements[0]].plus = float(elements[2])
            systematics[elements[0]].minus = float(elements[3])
            np_by_cat[systematics[elements[0]].category].append(
                systematics[elements[0]])

    if args.out_dir is None:
        outd = f"{args.workspace}/MPL"
    else:
        outd = args.out_dir
    if outd != ".":
        PosixPath(outd).mkdir(parents=True, exist_ok=True)

    for category, nps in np_by_cat.items():
        fig, ax = draw_pulls(args, nps)
        out_name = f"{outd}/pulls_{category}.pdf"
        fig.savefig(out_name, bbox_inches="tight")
        if args.shrink:
            shrink_pdf(out_name)
        log.info(f"Done with {category}")
Example #21
0
def find(warn=True):
    """Search for a pavilion.yaml configuration file. Use the one pointed
to by the PAV_CONFIG_FILE environment variable. Otherwise, use the first
found in these directories the default config search paths:

- The ~/.pavilion directory
- The Pavilion source directory (don't put your config here).
"""

    if PAV_CONFIG_FILE is not None:
        pav_cfg_file = PosixPath(Path(PAV_CONFIG_FILE))
        # pylint has a bug that pops up occasionally with pathlib.
        if pav_cfg_file.is_file():  # pylint: disable=no-member
            try:
                cfg = PavilionConfigLoader().load(pav_cfg_file.open())  # pylint: disable=no-member
                cfg.pav_cfg_file = pav_cfg_file
                return cfg
            except Exception as err:
                raise RuntimeError("Error in Pavilion config at {}: {}".format(
                    pav_cfg_file, err))

    for config_dir in PAV_CONFIG_SEARCH_DIRS:
        path = config_dir / 'pavilion.yaml'
        if path.is_file():  # pylint: disable=no-member
            try:
                # Parse and load the configuration.
                cfg = PavilionConfigLoader().load(path.open())  # pylint: disable=no-member
                cfg.pav_cfg_file = path
                return cfg
            except Exception as err:
                raise RuntimeError("Error in Pavilion config at {}: {}".format(
                    path, err))

    if warn:
        LOGGER.warning("Could not find a pavilion config file. Using an "
                       "empty/default config.")
    return PavilionConfigLoader().load_empty()
Example #22
0
def default_to_gpt2_format(tokenizer, fromFile: pathlib.PosixPath,
                           toFile: pathlib.PosixPath, labels_max_len: int,
                           turns_meta_file_test: pathlib.PosixPath,
                           dlgs_meta_file_test: pathlib.PosixPath) -> int:
    lst_input_ids = []
    num_dlgs = 0
    dlgs_meta = []
    turns_meta = []
    test_file = fromFile.suffix == '.test'
    with fromFile.open('rb') as fromF:
        lst_dlgs = pickle.load(fromF)
    for dlg in lst_dlgs:
        num_dlgs += 1
        assert len(dlg['user']) == len(dlg['bot'])
        if test_file:
            # metadata: (1) line # from the original dataset file where
            # this dlg started, (2) index of first turn in dlg
            dlgs_meta.append({
                'lineno': copy.deepcopy(dlg['dlg_start_lineno']),
                'idx_first_trn': len(lst_input_ids)
            })
        # persona is not used, so it is ignored
        history = ''
        max_gpt2_len = tokenizer.max_model_input_sizes[
            'distilgpt2'] - labels_max_len - 3
        for i, (u_str, b_str) in enumerate(zip(dlg['user'], dlg['bot'])):
            feature_ids = tokenizer(" ".join([history, u_str]),
                                    return_length=False,
                                    return_token_type_ids=False,
                                    return_attention_mask=False)

            if test_file:
                # metadata: (1) user part of input string; (2) if input
                # is truncated then (i) part of string that is not
                # truncated, (2) part of string that is truncated
                untrunc_part_inp = trunc_part_inp = None
                if (len(feature_ids['input_ids']) - max_gpt2_len) > 0:
                    untrunc_part_inp = tokenizer.decode(
                        feature_ids['input_ids'][-max_gpt2_len:])
                    trunc_part_inp = tokenizer.decode(
                        feature_ids['input_ids'][:-max_gpt2_len])
                turns_meta.append(
                    copy.deepcopy({
                        'u_str':
                        u_str,
                        'truncation': (untrunc_part_inp, trunc_part_inp)
                        if untrunc_part_inp is not None else None
                    }))

            label_ids = tokenizer(b_str,
                                  return_length=False,
                                  return_token_type_ids=False,
                                  return_attention_mask=False)
            # Policy: If feature_ids is larger than max allowed by GPT2,
            # then truncate by dropping the tokens in the beginning
            feature_ids_trunc = feature_ids['input_ids'][-max_gpt2_len:]
            lst_input_ids.append(
                copy.deepcopy([tokenizer.bos_token_id] + feature_ids_trunc +
                              [tokenizer.sep_token_id] +
                              label_ids['input_ids'] +
                              [tokenizer.eos_token_id]))
            try:
                idx = dlg['bot_idx'].index(i)
                history = " ".join([
                    history, u_str, b_str,
                    " ".join(dlg['api_call_result'][idx])
                ])
            except ValueError:
                history = " ".join([history, u_str, b_str])
    # last entry is not a new dlg but the previous dlg with the same
    # line # but index of last turn plus 1
    if test_file:
        dlgs_meta.append({
            'lineno': copy.deepcopy(dlg['dlg_start_lineno']),
            'idx_first_trn': len(lst_input_ids)
        })

    with toFile.open('wb') as toF:
        pickle.dump(lst_input_ids, toF, protocol=pickle.HIGHEST_PROTOCOL)
        logg.info(f'Done writing to file {toFile}')
    if test_file:
        with dlgs_meta_file_test.open('wb') as dF, turns_meta_file_test.open(
                'wb') as tF:
            pickle.dump(dlgs_meta, dF, protocol=pickle.HIGHEST_PROTOCOL)
            logg.info(f'Done writing to file {dlgs_meta_file_test}')
            pickle.dump(turns_meta, tF, protocol=pickle.HIGHEST_PROTOCOL)
            logg.info(f'Done writing to file {turns_meta_file_test}')
    return num_dlgs
def create_h5_file(basins: List,
                   cfg: Dict,
                   h5_file: PosixPath,
                   scaler_file: PosixPath,
                   additional_features: List[Dict] = []):

    if h5_file.is_file():
        raise FileExistsError(f"File already exists at {h5_file}")

    if cfg.get("camels_attributes", []):
        attributes_sanity_check(data_dir=cfg["data_dir"],
                                dataset=cfg["dataset"],
                                basins=basins,
                                attribute_list=cfg.get("camels_attributes",
                                                       []))

    n_dyn_inputs = len(cfg["dynamic_inputs"])
    n_targets = len(cfg["target_variable"])
    # we only store user-defined additional static features provided in the additional_features table
    n_stat = len(cfg["static_inputs"])

    with h5py.File(h5_file, 'w') as out_f:
        dyn_input_data = out_f.create_dataset(
            'dynamic_inputs',
            shape=(0, cfg["seq_length"], n_dyn_inputs),
            maxshape=(None, cfg["seq_length"], n_dyn_inputs),
            chunks=True,
            dtype=np.float32,
            compression='gzip')
        if n_stat > 0:
            stat_input_data = out_f.create_dataset('static_inputs',
                                                   shape=(0, n_stat),
                                                   maxshape=(None, n_stat),
                                                   chunks=True,
                                                   dtype=np.float32,
                                                   compression='gzip')
        target_data = out_f.create_dataset('target_data',
                                           shape=(0, cfg["seq_length"],
                                                  n_targets),
                                           maxshape=(None, cfg["seq_length"],
                                                     n_targets),
                                           chunks=True,
                                           dtype=np.float32,
                                           compression='gzip')
        q_stds = out_f.create_dataset('q_stds',
                                      shape=(0, 1),
                                      maxshape=(None, 1),
                                      dtype=np.float32,
                                      compression='gzip',
                                      chunks=True)
        sample_2_basin = out_f.create_dataset('sample_2_basin',
                                              shape=(0, ),
                                              maxshape=(None, ),
                                              dtype="S11",
                                              compression='gzip',
                                              chunks=True)

        scalers = {
            'dyn_mean': np.zeros(n_dyn_inputs),
            'dyn_std': np.zeros(n_dyn_inputs),
            'target_mean': np.zeros(n_targets),
            'target_std': np.zeros(n_targets)
        }
        total_samples = 0

        basins_without_train_data = []

        for basin in tqdm(basins, file=sys.stdout):

            if additional_features:
                add_features = [d[basin] for d in additional_features]
            else:
                add_features = []

            try:
                dataset = get_basin_dataset(basin=basin,
                                            cfg=cfg,
                                            mode="train",
                                            additional_features=add_features)
            except NoTrainDataError as error:
                # skip basin
                basins_without_train_data.append(basin)
                continue

            num_samples = len(dataset)
            total_samples = dyn_input_data.shape[0] + num_samples

            basin_scaler = dataset.get_scaler()

            scalers["dyn_mean"] += num_samples * basin_scaler["dyn_mean"]
            scalers["dyn_std"] += num_samples * basin_scaler["dyn_std"]
            scalers["target_mean"] += num_samples * basin_scaler["target_mean"]
            scalers["target_std"] += num_samples * basin_scaler["target_std"]

            # store input and output samples
            dyn_input_data.resize(
                (total_samples, cfg["seq_length"], n_dyn_inputs))
            dyn_input_data[-num_samples:, :, :] = dataset.x_d.numpy()

            target_data.resize((total_samples, cfg["seq_length"], n_targets))
            target_data[-num_samples:, :, :] = dataset.y.numpy()

            if n_stat > 0:
                x_stat = dataset.x_s.numpy()
                stat_input_data.resize((total_samples, n_stat))
                # the non-CAMELS stat features are stored at the end of the combined features
                stat_input_data[-num_samples:, :] = x_stat[:, -n_stat:]

            # additionally store std of discharge of this basin for each sample
            q_stds.resize((total_samples, 1))
            q_std_array = np.array([dataset.q_std] * num_samples,
                                   dtype=np.float32).reshape(-1, 1)
            q_stds[-num_samples:, :] = q_std_array

            sample_2_basin.resize((total_samples, ))
            str_arr = np.array([basin.encode("ascii", "ignore")] * num_samples)
            sample_2_basin[-num_samples:] = str_arr

            out_f.flush()

    if basins_without_train_data:
        print(
            "### The following basins were skipped, since they don't have discharge observations in the train period"
        )
        print(basins_without_train_data)

    for key in scalers:
        scalers[key] /= total_samples

    if n_stat > 0:
        with h5py.File(h5_file, 'r') as f:
            scalers["stat_mean"] = f["static_inputs"][:].mean(axis=0)
            scalers["stat_std"] = f["static_inputs"][:].std(axis=0)

    if cfg.get("camels_attributes", []):
        attr_means, attr_stds = get_camels_scaler(
            data_dir=cfg["data_dir"],
            basins=basins,
            attributes=cfg["camels_attributes"])
        scalers["camels_attr_mean"] = attr_means
        scalers["camels_attr_std"] = attr_stds

    # sanity check that no std for any feature is 0, which results in NaN values during training
    problems_in_feature_std = []
    for k, v in scalers.items():
        # skip attributes, which were already tested above
        if k.endswith('_std') and ('attr' not in k):
            if any(v == 0) or any(np.isnan(v)):
                problems_in_feature_std.append(
                    (k, list(np.argwhere(np.isnan(v) | (v == 0)).flatten())))
    if problems_in_feature_std:
        print(
            "### ERROR: Zero or NaN std encountered in the following features."
        )
        for k, pos in problems_in_feature_std:
            print(f"In scaler for {k} at position {pos}")
        raise RuntimeError

    with scaler_file.open("wb") as fp:
        pickle.dump(scalers, fp)

    # already normalize all data, so we don't have to do this while training
    with h5py.File(h5_file, 'r+') as f:
        print(
            f"Applying normalization in chunks of {CHUNK_SIZE} using global statistics"
        )
        # perform iteration in chunks, for allowing to run on low memory systems

        n_batches = f["dynamic_inputs"].shape[0] // CHUNK_SIZE
        if f["dynamic_inputs"].shape[0] % CHUNK_SIZE > 0:
            n_batches += 1

        for i in tqdm(range(n_batches), file=sys.stdout):

            start_idx = i * CHUNK_SIZE
            end_idx = (i + 1) * CHUNK_SIZE
            if end_idx > f["dynamic_inputs"].shape[0]:
                slice_obj = slice(start_idx, None)
            else:
                slice_obj = slice(start_idx, end_idx)

            data = f["dynamic_inputs"]
            data[slice_obj] = (data[slice_obj] -
                               scalers["dyn_mean"]) / scalers["dyn_std"]

            data = f["target_data"]
            if cfg.get("zero_center_target", True):
                data[slice_obj] = (data[slice_obj] - scalers["target_mean"]
                                   ) / scalers["target_std"]
            else:
                data[slice_obj] = data[slice_obj] / scalers["target_std"]

            if n_stat > 0:
                data = f["static_inputs"]
                data[slice_obj] = (data[slice_obj] -
                                   scalers["stat_mean"]) / scalers["stat_std"]

            f.flush()
Example #24
0
class MP3115Sensor(AbstractTemperatureSensor):
    """
    Class to read the sensor MP3115 on the I2C bus

    To find the sensor id, make sure the i2c bus is enabled, the device is connected, and 
    mp3115a2 module is loaded. Additionally, you have to tell the i2c bus to read the chip
    with the 
    
    "echo mp3115a2 0x60" > /sys/bus/i2c/devices/i2c-1/new_device

    ...command. The i2c-1 bus number may change if the system has more than one i2c bus loadded.

     Then look in /sys/bus/i2c/devices directory for the 1-0060 directory.

     The 0x60 above and the 1-0060 represents the i2c bus id. The bus id can be determined
     with the i2cdetect command is needed.

    """
    def __init__(self, temperature_config):
        super().__init__(temperature_config)
        self.property_bus = "i2c"
        devicepath = PosixPath("/sys/bus/i2c/devices").joinpath(
            temperature_config.device).joinpath("iio:device0")
        self.temperature_path_raw = PosixPath(
            devicepath.joinpath("in_temp_raw"))
        self.temperature_path_scale = PosixPath(
            devicepath.joinpath("in_temp_scale"))
        self.pressure_path_raw = PosixPath(
            devicepath.joinpath("in_pressure_raw"))
        self.pressure_path_scale = PosixPath(
            devicepath.joinpath("in_pressure_scale"))
        # Make sure they exist
        if (not self.temperature_path_raw.exists()
                and not self.temperature_path_raw.is_file()):
            raise DeviceError(self.temperature_path_raw)
        if (not self.temperature_path_scale.exists()
                and not self.temperature_path_scale.is_file()):
            raise DeviceError(self.temperature_path_scale)
        if (not self.pressure_path_raw.exists()
                and not self.pressure_path_raw.is_file()):
            raise DeviceError(self.pressure_path_raw)
        if (not self.pressure_path_scale.exists()
                and not self.pressure_path_scale.is_file()):
            raise DeviceError(self.pressure_path_scale)

    @property
    def temperature(self):
        with self.temperature_path_raw.open() as f:
            data_raw = f.readline()
        with self.temperature_path_scale.open() as f:
            data_scale = f.readline()
        result = int(data_raw) * float(data_scale)
        if (self.property_sensor_config.rounding != -1):
            result = round(result, self.property_sensor_config.rounding)
        return result

    @property
    def pressure(self):
        with self.pressure_path_raw.open() as f:
            data_raw = f.readline()
        with self.pressure_path_scale.open() as f:
            data_scale = f.readline()
        result = int(data_raw) * float(data_scale) * 10
        if (self.property_sensor_config.rounding != -1):
            result = round(result, self.property_sensor_config.rounding)
        return result

    @property
    def bus(self):
        return self.property_bus
Example #25
0
def fetch_test_data_file(file: PosixPath):
    with file.open() as f:
        json_file = json.load(f)
    return json_file
Example #26
0
def file_reader(file: PosixPath) -> Iterator[str]:
    """Yield entries from files"""
    for row in file.open():
        clean_row = row.strip("\n")
        yield clean_row
Example #27
0
def _read_file(path: pathlib.PosixPath):
    with path.open() as f:
        text = f.read()
    lines = tuple(text.splitlines())
    return lines
def load_basin_file(basin_file: PosixPath) -> List:
    with basin_file.open("r") as fp:
        basins = fp.readlines()
    basins = [basin.strip() for basin in basins]
    return basins