コード例 #1
0
ファイル: read_strategies.py プロジェクト: willu47/otoole
    def extract_set(
        self, datafile_parser, name, config, dict_of_dataframes
    ) -> pd.DataFrame:
        data = datafile_parser[name].data

        indices = ["VALUE"]
        df = pd.DataFrame(data=data, columns=indices, dtype=config[name]["dtype"])

        return check_set_datatype(df, config, name)
コード例 #2
0
ファイル: read_strategies.py プロジェクト: willu47/otoole
    def read(
        self, filepath, **kwargs
    ) -> Tuple[Dict[str, pd.DataFrame], Dict[str, Any]]:

        input_data = {}

        default_values = self._read_default_values(self.input_config)

        for parameter, details in self.input_config.items():
            logger.info("Looking for %s", parameter)
            config_details = self.input_config[parameter]

            csv_path = os.path.join(filepath, parameter + ".csv")

            try:
                df = pd.read_csv(csv_path)
            except pd.errors.EmptyDataError:
                logger.error("No data found in file for %s", parameter)
                expected_columns = config_details["indices"]
                default_columns = expected_columns + ["VALUE"]
                df = pd.DataFrame(columns=default_columns)

            entity_type = self.input_config[parameter]["type"]

            if entity_type == "param":
                narrow = self._check_parameter(df, config_details["indices"], parameter)
                if not narrow.empty:
                    narrow_checked = check_datatypes(
                        narrow, self.input_config, parameter
                    )
                else:
                    narrow_checked = narrow
            elif entity_type == "set":
                narrow = self._check_set(df, config_details, parameter)
                if not narrow.empty:
                    narrow_checked = check_set_datatype(
                        narrow, self.input_config, parameter
                    )
                else:
                    narrow_checked = narrow

            input_data[parameter] = narrow_checked

        input_data = self._check_index(input_data)

        return input_data, default_values
コード例 #3
0
def convert_amply_to_dataframe(datafile_parser,
                               config) -> Dict[str, pd.DataFrame]:
    """Converts an amply parser to a dict of pandas dataframes

    Arguments
    ---------
    datafile_parser : Amply
    config : Dict

    Returns
    -------
    dict of pandas.DataFrame
    """

    dict_of_dataframes = {}

    for name in datafile_parser.symbols.keys():
        logger.debug("Extracting data for %s", name)
        if config[name]["type"] == "param":
            indices = config[name]["indices"]
            indices_dtypes = [config[index]["dtype"] for index in indices]
            indices.append("VALUE")
            indices_dtypes.append("float")

            raw_data = datafile_parser[name].data
            data = convert_amply_data_to_list(raw_data)
            df = pd.DataFrame(data=data, columns=indices)
            try:
                dict_of_dataframes[name] = check_datatypes(df, config, name)
            except ValueError as ex:
                msg = "Validation error when checking datatype of {}: {}".format(
                    name, str(ex))
                raise ValueError(msg)
        elif config[name]["type"] == "set":
            data = datafile_parser[name].data
            logger.debug(data)

            indices = ["VALUE"]
            df = pd.DataFrame(data=data,
                              columns=indices,
                              dtype=config[name]["dtype"])
            dict_of_dataframes[name] = check_set_datatype(df, config, name)
        logger.debug("\n%s\n", dict_of_dataframes[name])

    return dict_of_dataframes
コード例 #4
0
ファイル: read_strategies.py プロジェクト: wogandavid/otoole
    def read(self, filepath: Union[str, TextIO],
             **kwargs) -> Tuple[Dict[str, pd.DataFrame], Dict[str, Any]]:

        config = self.input_config
        default_values = self._read_default_values(config)

        xl = pd.ExcelFile(filepath)

        input_data = {}

        for name in xl.sheet_names:

            try:
                mod_name = EXCEL_TO_CSV[name]
            except KeyError:
                mod_name = name

            config_details = config[mod_name]

            df = xl.parse(name)

            entity_type = config[mod_name]["type"]

            if entity_type == "param":
                narrow = self._check_parameter(df, config_details, mod_name)
                if not narrow.empty:
                    narrow_checked = check_datatypes(narrow, config, mod_name)
                else:
                    narrow_checked = narrow
            elif entity_type == "set":
                narrow = self._check_set(df, config_details, mod_name)
                if not narrow.empty:
                    narrow_checked = check_set_datatype(
                        narrow, config, mod_name)
                else:
                    narrow_checked = narrow

            input_data[mod_name] = narrow_checked

        input_data = self._check_index(input_data)

        return input_data, default_values