Beispiel #1
0
    def convert_df(self, data: List[List[str]], variable: str, start_year: int,
                   end_year: int) -> pd.DataFrame:
        """Read the cplex lines into a pandas DataFrame"""
        index = self.results_config[variable]["indices"]
        columns = ["variable"] + index[:-1] + list(
            range(start_year, end_year + 1, 1))
        df = pd.DataFrame(data=data, columns=columns)
        df, index = check_duplicate_index(df, columns, index)
        df = df.drop(columns="variable")

        LOGGER.debug(
            f"Attempting to set index for {variable} with columns {index[:-1]}"
        )
        try:
            df = df.set_index(index[:-1])
        except NotImplementedError as ex:
            LOGGER.error(f"Error setting index for {df.head()}")
            raise NotImplementedError(ex)
        df = df.melt(var_name="YEAR", value_name="VALUE", ignore_index=False)
        df = df.reset_index()
        df = check_datatypes(df, {
            **self.input_config,
            **self.results_config
        }, variable)
        df = df.set_index(index)
        df = df[(df != 0).any(axis=1)]
        return df
Beispiel #2
0
    def extract_param(self, config, name, datafile_parser,
                      dict_of_dataframes) -> pd.DataFrame:
        indices = config[name]["indices"].copy()
        indices_dtypes = [config[index]["dtype"] for index in indices]
        indices.append("VALUE")
        indices_dtypes.append("float")

        raw_data = datafile_parser[name].data
        data = self._convert_amply_data_to_list(raw_data)
        df = pd.DataFrame(data=data, columns=indices)
        try:
            return check_datatypes(df, config, name)
        except ValueError as ex:
            msg = "Validation error when checking datatype of {}: {}".format(
                name, str(ex))
            raise ValueError(msg)
Beispiel #3
0
    def read(
        self, filepath, **kwargs
    ) -> Tuple[Dict[str, pd.DataFrame], Dict[str, Any]]:

        input_data = {}

        default_values = self._read_default_values(self.input_config)

        for parameter, details in self.input_config.items():
            logger.info("Looking for %s", parameter)
            config_details = self.input_config[parameter]

            csv_path = os.path.join(filepath, parameter + ".csv")

            try:
                df = pd.read_csv(csv_path)
            except pd.errors.EmptyDataError:
                logger.error("No data found in file for %s", parameter)
                expected_columns = config_details["indices"]
                default_columns = expected_columns + ["VALUE"]
                df = pd.DataFrame(columns=default_columns)

            entity_type = self.input_config[parameter]["type"]

            if entity_type == "param":
                narrow = self._check_parameter(df, config_details["indices"], parameter)
                if not narrow.empty:
                    narrow_checked = check_datatypes(
                        narrow, self.input_config, parameter
                    )
                else:
                    narrow_checked = narrow
            elif entity_type == "set":
                narrow = self._check_set(df, config_details, parameter)
                if not narrow.empty:
                    narrow_checked = check_set_datatype(
                        narrow, self.input_config, parameter
                    )
                else:
                    narrow_checked = narrow

            input_data[parameter] = narrow_checked

        input_data = self._check_index(input_data)

        return input_data, default_values
Beispiel #4
0
def convert_amply_to_dataframe(datafile_parser,
                               config) -> Dict[str, pd.DataFrame]:
    """Converts an amply parser to a dict of pandas dataframes

    Arguments
    ---------
    datafile_parser : Amply
    config : Dict

    Returns
    -------
    dict of pandas.DataFrame
    """

    dict_of_dataframes = {}

    for name in datafile_parser.symbols.keys():
        logger.debug("Extracting data for %s", name)
        if config[name]["type"] == "param":
            indices = config[name]["indices"]
            indices_dtypes = [config[index]["dtype"] for index in indices]
            indices.append("VALUE")
            indices_dtypes.append("float")

            raw_data = datafile_parser[name].data
            data = convert_amply_data_to_list(raw_data)
            df = pd.DataFrame(data=data, columns=indices)
            try:
                dict_of_dataframes[name] = check_datatypes(df, config, name)
            except ValueError as ex:
                msg = "Validation error when checking datatype of {}: {}".format(
                    name, str(ex))
                raise ValueError(msg)
        elif config[name]["type"] == "set":
            data = datafile_parser[name].data
            logger.debug(data)

            indices = ["VALUE"]
            df = pd.DataFrame(data=data,
                              columns=indices,
                              dtype=config[name]["dtype"])
            dict_of_dataframes[name] = check_set_datatype(df, config, name)
        logger.debug("\n%s\n", dict_of_dataframes[name])

    return dict_of_dataframes
Beispiel #5
0
    def read(self, filepath: Union[str, TextIO],
             **kwargs) -> Tuple[Dict[str, pd.DataFrame], Dict[str, Any]]:

        config = self.input_config
        default_values = self._read_default_values(config)

        xl = pd.ExcelFile(filepath)

        input_data = {}

        for name in xl.sheet_names:

            try:
                mod_name = EXCEL_TO_CSV[name]
            except KeyError:
                mod_name = name

            config_details = config[mod_name]

            df = xl.parse(name)

            entity_type = config[mod_name]["type"]

            if entity_type == "param":
                narrow = self._check_parameter(df, config_details, mod_name)
                if not narrow.empty:
                    narrow_checked = check_datatypes(narrow, config, mod_name)
                else:
                    narrow_checked = narrow
            elif entity_type == "set":
                narrow = self._check_set(df, config_details, mod_name)
                if not narrow.empty:
                    narrow_checked = check_set_datatype(
                        narrow, config, mod_name)
                else:
                    narrow_checked = narrow

            input_data[mod_name] = narrow_checked

        input_data = self._check_index(input_data)

        return input_data, default_values
Beispiel #6
0
    def convert_df(self, row_as_list: List, start_year: int,
                   end_year: int) -> Tuple[str, pd.DataFrame]:
        """Read the cplex line into a pandas DataFrame

        """
        variable, dimensions, values = self.extract_variable_dimensions_values(
            row_as_list)
        index = self.results_config[variable]["indices"]
        columns = index[:-1] + list(range(start_year, end_year + 1, 1))
        df = pd.DataFrame(data=[list(dimensions) + values],
                          columns=columns).set_index(index[:-1])
        df = df.melt(var_name="YEAR", value_name="VALUE", ignore_index=False)
        df = df.reset_index()
        df = check_datatypes(df, {
            **self.input_config,
            **self.results_config
        }, variable)
        df = df.set_index(index)
        df = df[(df != 0).any(axis=1)]
        return (variable, df)