if not os.path.isdir(OUTPUT_DATABASE_PATH):
    make_folders(OUTPUT_DATABASE_PATH)

if not os.path.isdir(OBS_DATABASE_PATH):
    make_folders(OBS_DATABASE_PATH)

# %% [markdown]
# ## Protocol

# %%
SCENARIO_PROTOCOL = os.path.join(INPUT_DATA_DIR, "data", "protocol",
                                 "rcmip-emissions-annual-means.csv")

# %%
protocol_db = ScmDataFrame(SCENARIO_PROTOCOL)
protocol_db.head()

# %%
protocol_db["scenario"].unique()

# %%
DATA_PROTOCOL = os.path.join(
    INPUT_DATA_DIR,
    "data",
    "submission-template",
    "rcmip-data-submission-template.xlsx",
)

# %%
protocol_variables = pd.read_excel(DATA_PROTOCOL,
                                   sheet_name="variable_definitions")
db = []
for rf in tqdm.tqdm_notebook(relevant_files):
    # print(rf.endswith('sf'))
    if rf.endswith(".csv"):
        loaded = ScmDataFrame(rf)
    else:
        loaded = ScmDataFrame(rf, sheet_name="your_data")
    db.append(loaded.filter(variable=variables_erf,
                            scenario=scenarios_fl))  # variables_of_interest))
print(db)
db = df_append(db).timeseries().reset_index()
db["unit"] = db["unit"].apply(lambda x: x.replace(
    "Dimensionless", "dimensionless") if isinstance(x, str) else x)
clear_output()
db = ScmDataFrame(db)
db.head()

# %% jupyter={"outputs_hidden": false} pycharm={"name": "#%%\n"}
db[variable].unique()

# %%
db[climatemodel].unique()

# %%
db[scenario].unique()

# %%
db['unit'].unique()

# %%
for cm in db[climatemodel].unique():