예제 #1
0
def _build():
    """Utility function for building a Selenium Chrome driver"""
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--no-sandbox")
    try:
        load_dotenv(Path(get_project_root(), ".env"))
        chrome_options.binary_location = os.environ.get("GOOGLE_CHROME_PATH")
        return webdriver.Chrome(
            executable_path=os.environ.get("CHROMEDRIVER_PATH"),
            options=chrome_options)
    except (WebDriverException, TypeError):
        chromedriver_autoinstaller.install()
        return webdriver.Chrome(options=chrome_options)
예제 #2
0
파일: income.py 프로젝트: rxavier/econuy
def income_household(update_loc: Union[str, PathLike,
                                       Engine, Connection, None] = None,
                     revise_rows: Union[str, int] = "nodup",
                     save_loc: Union[str, PathLike,
                                     Engine, Connection, None] = None,
                     only_get: bool = False) -> pd.DataFrame:
    """Get average household income.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly average household income : pd.DataFrame

    """
    name = "income_household"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc,
                         name=name)
        if not output.equals(pd.DataFrame()):
            return output
    try:
        raw = pd.read_excel(urls[name]["dl"]["main"], sheet_name="Mensual",
                            skiprows=5, index_col=0).dropna(how="all")
    except URLError as err:
        if "SSL: CERTIFICATE_VERIFY_FAILED" in str(err):
            certificate = Path(get_project_root(), "utils", "files",
                               "ine_certs.pem")
            r = requests.get(urls[name]["dl"]["main"],
                             verify=certificate)
            raw = pd.read_excel(BytesIO(r.content),
                                sheet_name="Mensual",
                                skiprows=5, index_col=0).dropna(how="all")
        else:
            raise err
    raw.index = pd.to_datetime(raw.index)
    output = raw.loc[~pd.isna(raw.index)]
    output.index = output.index + MonthEnd(0)
    output.columns = ["Total país", "Montevideo", "Interior: total",
                      "Interior: localidades de más de 5 mil hab.",
                      "Interior: localidades pequeñas y rural"]

    missing = pd.read_excel(urls[name]["dl"]["missing"],
                            index_col=0, header=0).iloc[:, 10:13]
    missing.columns = output.columns[:3]
    output = output.append(missing, sort=False)
    output = output.apply(pd.to_numeric, errors="coerce")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output, prev_data=previous_data,
                             revise_rows=revise_rows)

    metadata._set(output, area="Ingresos", currency="UYU",
                  inf_adj="No", unit="Pesos", seas_adj="NSA",
                  ts_type="Flujo", cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc,
                data=output, name=name)

    return output
예제 #3
0
def long_rates(update_loc: Union[str, PathLike, Engine, Connection,
                                 None] = None,
               revise_rows: Union[str, int] = "nodup",
               save_loc: Union[str, PathLike, Engine, Connection, None] = None,
               only_get: bool = False) -> pd.DataFrame:
    """Get 10-year government bonds interest rates.

    Countries/aggregates selected are US, Germany, France, Italy, Spain
    United Kingdom, Japan and China.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Daily 10-year government bonds interest rates : pd.DataFrame

    """
    name = "global_long_rates"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    bonds = []
    load_dotenv(Path(get_project_root(), ".env"))
    fred_api_key = os.environ.get("FRED_API_KEY")
    r = requests.get(f"{urls[name]['dl']['fred']}DGS10&api_key="
                     f"{fred_api_key}&file_type=json")
    us = pd.DataFrame.from_records(r.json()["observations"])
    us = us[["date", "value"]].set_index("date")
    us.index = pd.to_datetime(us.index)
    us.columns = ["United States"]
    bonds.append(us.apply(pd.to_numeric, errors="coerce").dropna())

    for country, sid in zip([
            "Germany", "France", "Italy", "Spain", "United Kingdom", "Japan",
            "China"
    ], ["23693", "23778", "23738", "23806", "23673", "23901", "29227"]):
        end_date_dt = dt.datetime(2000, 1, 1)
        start_date_dt = dt.datetime(2000, 1, 1)
        aux = []
        while end_date_dt < dt.datetime.now():
            end_date_dt = start_date_dt + dt.timedelta(days=5000)
            params = {
                "curr_id": sid,
                "smlID": str(randint(1000000, 99999999)),
                "header": f"{country} 10-Year Bond Yield Historical Data",
                "st_date": start_date_dt.strftime("%m/%d/%Y"),
                "end_date": end_date_dt.strftime("%m/%d/%Y"),
                "interval_sec": "Daily",
                "sort_col": "date",
                "sort_ord": "DESC",
                "action": "historical_data"
            }
            r = requests.post(urls["global_long_rates"]["dl"]["main"],
                              headers=investing_headers,
                              data=params)
            aux.append(
                pd.read_html(r.content,
                             match="Price",
                             index_col=0,
                             parse_dates=True)[0])
            start_date_dt = end_date_dt + dt.timedelta(days=1)
        aux = pd.concat(aux, axis=0)[["Price"]].sort_index()
        aux.columns = [country]
        bonds.append(aux)

    output = bonds[0].join(bonds[1:], how="left")
    output = output.interpolate(method="linear", limit_area="inside")
    output.columns = [
        "Estados Unidos", "Alemania", "Francia", "Italia", "España",
        "Reino Unido", "Japón", "China"
    ]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    metadata._set(output,
                  area="Global",
                  currency="USD",
                  inf_adj="No",
                  seas_adj="NSA",
                  unit="Tasa",
                  ts_type="-",
                  cumperiods=1)
    metadata._modify_multiindex(
        output,
        levels=[3],
        new_arrays=[["USD", "EUR", "EUR", "EUR", "EUR", "GBP", "JPY", "CNY"]])

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
예제 #4
0
def gdp(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        only_get: bool = False) -> pd.DataFrame:
    """Get seasonally adjusted real quarterly GDP for select countries.

    Countries/aggregates are US, EU-27, Japan and China.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Quarterly real GDP in seasonally adjusted terms : pd.DataFrame

    """
    name = "global_gdp"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    chn_y = dt.datetime.now().year + 1
    chn_r = requests.get(f"{urls[name]['dl']['chn_oecd']}{chn_y}-Q4")
    chn_json = chn_r.json()
    chn_datasets = []
    for dataset, start in zip(["0", "1"], ["2011-03-31", "1993-03-31"]):
        raw = chn_json["dataSets"][0]["series"][f"0:0:{dataset}:0"][
            "observations"]
        values = [x[0] for x in raw.values()]
        df = pd.DataFrame(data=values,
                          index=pd.date_range(start=start,
                                              freq="Q-DEC",
                                              periods=len(values)),
                          columns=["China"])
        chn_datasets.append(df)
    chn_qoq = chn_datasets[0]
    chn_yoy = chn_datasets[1]
    chn_obs = pd.read_excel(urls["global_gdp"]["dl"]["chn_obs"],
                            index_col=0).dropna(how="all",
                                                axis=1).dropna(how="all",
                                                               axis=0)
    chn_obs = chn_obs.loc[(chn_obs.index > "2011-01-01")
                          & (chn_obs.index < "2016-01-01")]
    chn_yoy["volume"] = chn_obs
    for row in reversed(range(len(chn_yoy.loc[chn_yoy.index < "2011-01-01"]))):
        if pd.isna(chn_yoy.iloc[row, 1]):
            chn_yoy.iloc[row, 1] = (chn_yoy.iloc[row + 4, 1] /
                                    (1 + chn_yoy.iloc[row + 4, 0] / 100))
    chn_yoy = chn_yoy[["volume"]].loc[chn_yoy.index < "2016-01-01"]
    metadata._set(chn_yoy)
    chn_sa = decompose(chn_yoy[["volume"]].loc[chn_yoy.index < "2016-01-01"],
                       component="seas",
                       method="x13")
    chn_sa = pd.concat([chn_sa, chn_qoq], axis=1)
    for row in range(len(chn_sa)):
        if not pd.isna(chn_sa.iloc[row, 1]):
            chn_sa.iloc[row, 0] = (chn_sa.iloc[row - 1, 0] *
                                   (1 + chn_sa.iloc[row, 1] / 100))
    chn = chn_sa.iloc[:, [0]].div(10)

    gdps = []
    load_dotenv(Path(get_project_root(), ".env"))
    fred_api_key = os.environ.get("FRED_API_KEY")
    for series in ["GDPC1", "CLVMNACSCAB1GQEU272020", "JPNRGDPEXP"]:
        r = requests.get(f"{urls[name]['dl']['fred']}{series}&api_key="
                         f"{fred_api_key}&file_type=json")
        aux = pd.DataFrame.from_records(r.json()["observations"])
        aux = aux[["date", "value"]].set_index("date")
        aux.index = pd.to_datetime(aux.index)
        aux.index = aux.index.shift(3, freq="M") + MonthEnd(0)
        aux.columns = [series]
        aux = aux.apply(pd.to_numeric, errors="coerce")
        if series == "GDPC1":
            aux = aux.div(4)
        elif series == "CLVMNACSCAB1GQEU272020":
            aux = aux.div(1000)
        gdps.append(aux)
    gdps = pd.concat(gdps, axis=1)

    output = pd.concat([gdps, chn], axis=1)
    output.columns = ["Estados Unidos", "Unión Europea", "Japón", "China"]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    metadata._set(output,
                  area="Global",
                  currency="USD",
                  inf_adj="Const.",
                  unit="Miles de millones",
                  seas_adj="SA",
                  ts_type="Flujo",
                  cumperiods=1)
    metadata._modify_multiindex(output,
                                levels=[3],
                                new_arrays=[["USD", "EUR", "JPY", "CNY"]])

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
예제 #5
0
def cpi_measures(update_loc: Union[str, PathLike, Engine, Connection,
                                   None] = None,
                 revise_rows: Union[str, int] = "nodup",
                 save_loc: Union[str, PathLike, Engine, Connection,
                                 None] = None,
                 only_get: bool = False) -> pd.DataFrame:
    """
    Get core CPI, Winsorized CPI, tradabe CPI, non-tradable CPI and residual
    CPI.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly CPI measures : pd.DataFrame

    """
    name = "cpi_measures"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output
    try:
        xls_10_14 = pd.ExcelFile(urls[name]["dl"]["2010-14"])
        xls_15 = pd.ExcelFile(urls[name]["dl"]["2015-"])
        prod_97 = (pd.read_excel(
            urls[name]["dl"]["1997"], skiprows=5).dropna(how="any").set_index(
                "Rubros, Agrupaciones, Subrubros, Familias y Artículos").T)
    except URLError as err:
        if "SSL: CERTIFICATE_VERIFY_FAILED" in str(err):
            certificate = Path(get_project_root(), "utils", "files",
                               "ine_certs.pem")
            r = requests.get(urls[name]["dl"]["2010-14"], verify=certificate)
            xls_10_14 = pd.ExcelFile(BytesIO(r.content))
            r = requests.get(urls[name]["dl"]["2015-"], verify=certificate)
            xls_15 = pd.ExcelFile(BytesIO(r.content))
            r = requests.get(urls[name]["dl"]["1997"], verify=certificate)
            prod_97 = (pd.read_excel(BytesIO(
                r.content), skiprows=5).dropna(how="any").set_index(
                    "Rubros, Agrupaciones, Subrubros, Familias y Artículos").T)
        else:
            raise err
    weights_97 = (pd.read_excel(urls[name]["dl"]["1997_weights"],
                                index_col=0).drop_duplicates(
                                    subset="Descripción", keep="first"))
    weights = pd.read_excel(xls_10_14,
                            sheet_name=xls_10_14.sheet_names[0],
                            usecols="A:C",
                            skiprows=13,
                            index_col=0).dropna(how="any")
    weights.columns = ["Item", "Weight"]
    weights_8 = weights.loc[weights.index.str.len() == 8]

    sheets = []
    for excel_file in [xls_10_14, xls_15]:
        for sheet in excel_file.sheet_names:
            raw = pd.read_excel(excel_file,
                                sheet_name=sheet,
                                usecols="D:IN",
                                skiprows=8).dropna(how="all")
            proc = raw.loc[:,
                           raw.columns.str.contains("Indice|Índice")].dropna(
                               how="all")
            sheets.append(proc.T)
    complete_10 = pd.concat(sheets)
    complete_10 = complete_10.iloc[:, 1:]
    complete_10.columns = [weights["Item"], weights.index]
    complete_10.index = pd.date_range(start="2010-12-31",
                                      periods=len(complete_10),
                                      freq="M")
    diff_8 = complete_10.loc[:,
                             complete_10.columns.get_level_values(
                                 level=1).str.len() == 8].pct_change()
    win = pd.DataFrame(winsorize(diff_8, limits=(0.05, 0.05), axis=1))
    win.index = diff_8.index
    win.columns = diff_8.columns.get_level_values(level=1)
    cpi_win = win.mul(weights_8.loc[:, "Weight"].T)
    cpi_win = cpi_win.sum(axis=1).add(1).cumprod().mul(100)

    weights_97["Weight"] = (weights_97["Rubro"].fillna(
        weights_97["Agrupación, subrubro, familia"]).fillna(
            weights_97["Artículo"]).drop(
                columns=["Rubro", "Agrupación, subrubro, familia", "Artículo"])
                            )
    prod_97 = prod_97.loc[:, list(cpi_details["1997_base"].keys())]
    prod_97.index = pd.date_range(start="1997-03-31",
                                  periods=len(prod_97),
                                  freq="M")
    weights_97 = (weights_97[weights_97["Descripción"].isin(
        cpi_details["1997_weights"])].set_index("Descripción").drop(
            columns=["Rubro", "Agrupación, subrubro, "
                     "familia", "Artículo"])).div(100)
    weights_97.index = prod_97.columns
    prod_10 = complete_10.loc[:, list(cpi_details["2010_base"].keys())]
    prod_10 = prod_10.loc[:, ~prod_10.columns.get_level_values(
        level=0).duplicated()]
    prod_10.columns = prod_10.columns.get_level_values(level=0)
    weights_10 = (weights.loc[weights["Item"].isin(
        list(cpi_details["2010_base"].keys()))].drop_duplicates(
            subset="Item", keep="first")).set_index("Item")
    items = []
    weights = []
    for item, weight, details in zip([prod_10, prod_97],
                                     [weights_10, weights_97],
                                     ["2010_base", "1997_base"]):
        for tradable in [True, False]:
            items.append(item.loc[:, [
                k for k, v in cpi_details[details].items()
                if v["Tradable"] is tradable
            ]])
            aux = weight.loc[[
                k for k, v in cpi_details[details].items()
                if v["Tradable"] is tradable
            ]]
            weights.append(aux.div(aux.sum()))
        for core in [True, False]:
            items.append(item.loc[:, [
                k for k, v in cpi_details[details].items() if v["Core"] is core
            ]])
            aux = weight.loc[[
                k for k, v in cpi_details[details].items() if v["Core"] is core
            ]]
            weights.append(aux.div(aux.sum()))

    intermediate = []
    for item, weight in zip(items, weights):
        intermediate.append(item.mul(weight.squeeze()).sum(1))

    output = []
    for x, y in zip(intermediate[:4], intermediate[4:]):
        aux = pd.concat([
            y.pct_change().loc[y.index < "2011-01-01"],
            x.pct_change().loc[x.index > "2011-01-01"]
        ])
        output.append(aux.fillna(0).add(1).cumprod().mul(100))

    cpi_re = cpi(update_loc=update_loc, save_loc=save_loc, only_get=True)
    cpi_re = cpi_re.loc[cpi_re.index >= "1997-03-31"]
    output = pd.concat([cpi_re] + output + [cpi_win], axis=1)
    output.columns = [
        "Índice de precios al consumo: total",
        "Índice de precios al consumo: transables",
        "Índice de precios al consumo: no transables",
        "Índice de precios al consumo: subyacente",
        "Índice de precios al consumo: residual",
        "Índice de precios al consumo: Winsorized 0.05"
    ]
    output = output.apply(pd.to_numeric, errors="coerce")
    metadata._set(output,
                  area="Precios y salarios",
                  currency="-",
                  inf_adj="No",
                  unit="2010-12=100",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)
    output = transform.rebase(output,
                              start_date="2010-12-01",
                              end_date="2010-12-31")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
예제 #6
0
def cpi(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        only_get: bool = False) -> pd.DataFrame:
    """Get CPI data.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly CPI index : pd.DataFrame

    """
    name = "cpi"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    try:
        cpi = pd.read_excel(urls[name]["dl"]["main"],
                            skiprows=7,
                            usecols="A:B",
                            index_col=0).dropna()
    except URLError as err:
        if "SSL: CERTIFICATE_VERIFY_FAILED" in str(err):
            certificate = Path(get_project_root(), "utils", "files",
                               "ine_certs.pem")
            r = requests.get(urls[name]["dl"]["main"], verify=certificate)
            cpi = pd.read_excel(BytesIO(r.content),
                                skiprows=7,
                                usecols="A:B",
                                index_col=0).dropna()
        else:
            raise err
    cpi.columns = ["Índice de precios al consumo"]
    cpi.rename_axis(None, inplace=True)
    cpi.index = cpi.index + MonthEnd(1)

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        cpi = ops._revise(new_data=cpi,
                          prev_data=previous_data,
                          revise_rows=revise_rows)

    cpi = cpi.apply(pd.to_numeric, errors="coerce")
    metadata._set(cpi,
                  area="Precios",
                  currency="-",
                  inf_adj="No",
                  unit="2010-10=100",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=cpi, name=name)

    return cpi
예제 #7
0
def nxr_monthly(update_loc: Union[str, PathLike, Engine, Connection,
                                  None] = None,
                revise_rows: Union[str, int] = "nodup",
                save_loc: Union[str, PathLike, Engine, Connection,
                                None] = None,
                only_get: bool = False) -> pd.DataFrame:
    """Get monthly nominal exchange rate data.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly nominal exchange rates : pd.DataFrame
        Sell rate, monthly average and end of period.

    """
    name = "nxr_monthly"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output
    try:
        nxr_raw = pd.read_excel(urls[name]["dl"]["main"],
                                skiprows=4,
                                index_col=0,
                                usecols="A,C,F")
    except URLError as err:
        if "SSL: CERTIFICATE_VERIFY_FAILED" in str(err):
            certificate = Path(get_project_root(), "utils", "files",
                               "ine_certs.pem")
            r = requests.get(urls[name]["dl"]["main"], verify=certificate)
            nxr_raw = pd.read_excel(BytesIO(r.content),
                                    skiprows=4,
                                    index_col=0,
                                    usecols="A,C,F")
        else:
            raise err
    nxr = nxr_raw.dropna(how="any", axis=0)
    nxr.columns = [
        "Tipo de cambio venta, fin de período",
        "Tipo de cambio venta, promedio"
    ]
    nxr.index = nxr.index + MonthEnd(1)
    nxr = nxr.apply(pd.to_numeric, errors="coerce")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        nxr = ops._revise(new_data=nxr,
                          prev_data=previous_data,
                          revise_rows=revise_rows)

    metadata._set(nxr,
                  area="Precios",
                  currency="UYU/USD",
                  inf_adj="No",
                  unit="-",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=nxr, name=name)

    return nxr
예제 #8
0
파일: labor.py 프로젝트: rxavier/econuy
def rates_people(update_loc: Union[str, PathLike, Engine, Connection,
                                   None] = None,
                 save_loc: Union[str, PathLike, Engine, Connection,
                                 None] = None,
                 only_get: bool = True) -> pd.DataFrame:
    """
    Get labor data, both rates and persons. Extends national data between 1991
    and 2005 with data for jurisdictions with more than 5,000 inhabitants.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default True
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Labor market data : pd.DataFrame

    """
    name = "labor_rates_people"

    rates = labor_rates(update_loc=update_loc, only_get=only_get)
    rates = rates.loc[:, [
        "Tasa de actividad: total", "Tasa de empleo: total",
        "Tasa de desempleo: total"
    ]]
    try:
        act_5000 = pd.read_excel(urls[name]["dl"]["act_5000"],
                                 sheet_name="Mensual",
                                 index_col=0,
                                 skiprows=8,
                                 usecols="A:B").dropna(how="any")
        emp_5000 = pd.read_excel(urls[name]["dl"]["emp_5000"],
                                 sheet_name="Mensual",
                                 index_col=0,
                                 skiprows=8,
                                 usecols="A:B").dropna(how="any")
        des_5000 = pd.read_excel(urls[name]["dl"]["des_5000"],
                                 sheet_name="Mensual",
                                 index_col=0,
                                 skiprows=7,
                                 usecols="A:B").dropna(how="any")
        working_age = pd.read_excel(urls[name]["dl"]["population"],
                                    skiprows=7,
                                    index_col=0,
                                    nrows=92).dropna(how="all")
    except URLError as err:
        if "SSL: CERTIFICATE_VERIFY_FAILED" in str(err):
            certificate = Path(get_project_root(), "utils", "files",
                               "ine_certs.pem")
            r = requests.get(urls[name]["dl"]["act_5000"], verify=certificate)
            act_5000 = pd.read_excel(BytesIO(r.content),
                                     sheet_name="Mensual",
                                     index_col=0,
                                     skiprows=8,
                                     usecols="A:B").dropna(how="any")
            r = requests.get(urls[name]["dl"]["emp_5000"], verify=certificate)
            emp_5000 = pd.read_excel(BytesIO(r.content),
                                     sheet_name="Mensual",
                                     index_col=0,
                                     skiprows=8,
                                     usecols="A:B").dropna(how="any")
            r = requests.get(urls[name]["dl"]["des_5000"], verify=certificate)
            des_5000 = pd.read_excel(BytesIO(r.content),
                                     sheet_name="Mensual",
                                     index_col=0,
                                     skiprows=7,
                                     usecols="A:B").dropna(how="any")
            r = requests.get(urls[name]["dl"]["population"],
                             verify=certificate)
            working_age = pd.read_excel(BytesIO(r.content),
                                        skiprows=7,
                                        index_col=0,
                                        nrows=92).dropna(how="all")
        else:
            raise err
    for df in [act_5000, emp_5000, des_5000]:
        df.index = df.index + MonthEnd(0)
    rates_5000 = pd.concat([act_5000, emp_5000, des_5000], axis=1)
    rates_prev = rates_5000.loc[rates_5000.index < "2006-01-31"]
    rates_prev.columns = rates.columns
    rates = pd.concat([rates_prev, rates])
    rates.columns = rates.columns.set_levels(
        rates.columns.levels[0].str.replace(": total", ""), level=0)

    ages = list(range(14, 90)) + ["90 y más"]
    working_age = working_age.loc[ages].sum()
    working_age.index = pd.date_range(start="1996-06-30",
                                      end="2050-06-30",
                                      freq="A-JUN")
    monthly_working_age = working_age.resample("M").interpolate("linear")
    monthly_working_age = monthly_working_age.reindex(rates.index)
    persons = rates.iloc[:, [0, 1]].div(100).mul(monthly_working_age, axis=0)
    persons["Desempleados"] = rates.iloc[:, 2].div(100).mul(persons.iloc[:, 0])
    persons.columns = ["Activos", "Empleados", "Desempleados"]

    metadata._set(persons,
                  area="Mercado laboral",
                  currency="-",
                  inf_adj="No",
                  unit="Personas",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)

    output = pd.concat([rates, persons], axis=1)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
예제 #9
0
파일: labor.py 프로젝트: rxavier/econuy
def hours(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
          revise_rows: Union[str, int] = "nodup",
          save_loc: Union[str, PathLike, Engine, Connection, None] = None,
          only_get: bool = False) -> pd.DataFrame:
    """Get average hours worked data.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly hours worked : pd.DataFrame

    """
    name = "hours_worked"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    try:
        raw = pd.read_excel(urls[name]["dl"]["main"],
                            sheet_name="Mensual",
                            skiprows=5,
                            index_col=0).dropna(how="all")
        prev_hours = pd.read_excel(urls[name]["dl"]["historical"],
                                   index_col=0,
                                   skiprows=8).dropna(how="all").iloc[:, [0]]
    except URLError as err:
        if "SSL: CERTIFICATE_VERIFY_FAILED" in str(err):
            certificate = Path(get_project_root(), "utils", "files",
                               "ine_certs.pem")
            r = requests.get(urls[name]["dl"]["main"], verify=certificate)
            raw = pd.read_excel(BytesIO(r.content),
                                sheet_name="Mensual",
                                skiprows=5,
                                index_col=0).dropna(how="all")
            r = requests.get(urls[name]["dl"]["historical"],
                             verify=certificate)
            prev_hours = pd.read_excel(BytesIO(r.content),
                                       index_col=0,
                                       skiprows=8).dropna(how="all").iloc[:,
                                                                          [0]]
        else:
            raise err
    raw.index = pd.to_datetime(raw.index)
    output = raw.loc[~pd.isna(raw.index)]
    output.index = output.index + MonthEnd(0)
    output.columns = [
        "Total", "Industrias manufactureras",
        "Electricidad, gas, agua y saneamiento", "Construcción", "Comercio",
        "Transporte y almacenamiento", "Alojamiento y servicios de comidas",
        "Información y comunicación", "Actividades financieras",
        "Actividades inmobiliarias y administrativas",
        "Actividades profesionales",
        "Administración pública y seguridad social", "Enseñanza", "Salud",
        "Arte y otros servicios", "Act. de hogares como empleadores",
        "Agro, forestación, pesca y minería"
    ]

    prev_hours = prev_hours.loc[~prev_hours.index.str.contains("-|Total")]
    prev_hours.index = pd.date_range(start="2006-01-31",
                                     freq="M",
                                     periods=len(prev_hours))
    prev_hours = prev_hours.loc[prev_hours.index < "2011-01-01"]
    prev_hours.columns = ["Total"]
    output = prev_hours.append(output, sort=False)

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    metadata._set(output,
                  area="Mercado laboral",
                  currency="-",
                  inf_adj="No",
                  unit="Horas por semana",
                  seas_adj="NSA",
                  ts_type="Stock",
                  cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
예제 #10
0
파일: labor.py 프로젝트: rxavier/econuy
def labor_rates(update_loc: Union[str, PathLike, Engine, Connection,
                                  None] = None,
                revise_rows: Union[str, int] = "nodup",
                save_loc: Union[str, PathLike, Engine, Connection,
                                None] = None,
                only_get: bool = False) -> pd.DataFrame:
    """Get labor market data (LFPR, employment and unemployment).

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly participation, employment and unemployment rates : pd.DataFrame

    """
    name = "labor_rates"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    try:
        labor_raw = pd.read_excel(urls[name]["dl"]["main"],
                                  skiprows=39).dropna(axis=0, thresh=2)
    except URLError as err:
        if "SSL: CERTIFICATE_VERIFY_FAILED" in str(err):
            certificate = Path(get_project_root(), "utils", "files",
                               "ine_certs.pem")
            r = requests.get(urls[name]["dl"]["main"], verify=certificate)
            labor_raw = pd.read_excel(BytesIO(r.content),
                                      skiprows=39).dropna(axis=0, thresh=2)
        else:
            raise err
    labor = labor_raw[~labor_raw["Unnamed: 0"].str.
                      contains("-|/|Total", regex=True)]
    labor.index = pd.date_range(start="2006-01-01",
                                periods=len(labor),
                                freq="M")
    labor = labor.drop(columns="Unnamed: 0")
    labor.columns = [
        "Tasa de actividad: total", "Tasa de actividad: hombres",
        "Tasa de actividad: mujeres", "Tasa de empleo: total",
        "Tasa de empleo: hombres", "Tasa de empleo: mujeres",
        "Tasa de desempleo: total", "Tasa de desempleo: hombres",
        "Tasa de desempleo: mujeres"
    ]
    missing = pd.read_excel(urls[name]["dl"]["missing"], index_col=0,
                            header=0).iloc[:, :9]
    missing.columns = labor.columns
    labor = labor.append(missing)
    labor = labor.loc[~labor.index.duplicated(keep="first")]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        labor = ops._revise(new_data=labor,
                            prev_data=previous_data,
                            revise_rows=revise_rows)

    labor = labor.apply(pd.to_numeric, errors="coerce")
    metadata._set(labor,
                  area="Mercado laboral",
                  currency="-",
                  inf_adj="No",
                  unit="Tasa",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=labor, name=name)

    return labor
예제 #11
0
파일: labor.py 프로젝트: rxavier/econuy
def nominal_wages(update_loc: Union[str, PathLike, Engine, Connection,
                                    None] = None,
                  revise_rows: Union[str, int] = "nodup",
                  save_loc: Union[str, PathLike, Engine, Connection,
                                  None] = None,
                  only_get: bool = False) -> pd.DataFrame:
    """Get nominal general, public and private sector wages data

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly wages separated by public and private sector : pd.DataFrame

    """
    name = "nominal_wages"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output
    try:
        historical = pd.read_excel(urls[name]["dl"]["historical"],
                                   skiprows=8,
                                   usecols="A:B")
        current = pd.read_excel(urls[name]["dl"]["current"],
                                skiprows=8,
                                usecols="A,C:D")
    except URLError as err:
        if "SSL: CERTIFICATE_VERIFY_FAILED" in str(err):
            certificate = Path(get_project_root(), "utils", "files",
                               "ine_certs.pem")
            r = requests.get(urls[name]["dl"]["historical"],
                             verify=certificate)
            historical = pd.read_excel(BytesIO(r.content),
                                       skiprows=8,
                                       usecols="A:B")
            r = requests.get(urls[name]["dl"]["current"], verify=certificate)
            current = pd.read_excel(BytesIO(r.content),
                                    skiprows=8,
                                    usecols="A,C:D")
        else:
            raise err

    historical = historical.dropna(how="any").set_index("Unnamed: 0")
    current = current.dropna(how="any").set_index("Unnamed: 0")
    wages = pd.concat([historical, current], axis=1)
    wages.index = wages.index + MonthEnd(1)
    wages.columns = [
        "Índice medio de salarios", "Índice medio de salarios privados",
        "Índice medio de salarios públicos"
    ]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        wages = ops._revise(new_data=wages,
                            prev_data=previous_data,
                            revise_rows=revise_rows)

    wages = wages.apply(pd.to_numeric, errors="coerce")
    metadata._set(wages,
                  area="Mercado laboral",
                  currency="UYU",
                  inf_adj="No",
                  unit="2008-07=100",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=wages, name=name)

    return wages