Example #1
0
from boo import download, read_dataframe

download(2012)
df = read_dataframe(2012)
print(df.head())
Example #2
0
"""Download and truncate Rosstat corporate dataset."""

from boo import download, build, read_dataframe, files

print("Please be prepared: "
      "download and build operations "
      "can take long time!")

year = 2012

# Download raw file from Rosstat
try:
    download(year)
except FileExistsError:
    print("Raw file already downloaded")

# Select fewer columns and assign short column names
# Will save to new file
try:
    build(year)
except FileExistsError:
    print("Work file already created")

# Read data as dataframe
df = read_dataframe(year)

print(year, "dataset:", df.shape[0], "rows and", df.shape[1], "columns")
print("File locations:", files(year))
Example #3
0
    # convert from thousand to billion rub and round to 3 digits
    cols = numeric_columns(df)
    zf = df.copy()
    zf.loc[:, cols] = zf.loc[:, cols].divide(divide_by).round(digits)
    return zf


# save as CSV and Excel
def locate(filename):
    return os.path.join("assets", filename)


if __name__ == "__main__":
    must_overwrite = True

    boo.download(2018)
    source_df = boo.read_dataframe(2018)
    print("Finished reading file, querying...")

    # Has some profit or loss, but not exactly zero thousand RUB,
    # (protects from ghost firms)
    ix = source_df.profit_before_tax != 0

    # Not a financial firm
    ix = ix & (~source_df.ok1.isin([64, 65]))

    # Gazprom will be on top of list
    df = source_df[ix].sort_values("ta", ascending=False).dropna()

    if must_overwrite:
        print("Saving files...")