def get_df_from_mongodb(name, collection):
    dates = [f'{x}-01-01' for x in range(2005, 2019)]
    df = Database.from_mongodb_df(collection, {'index': name}).reset_index(drop=True)\
                    .set_index('date', drop=True).reindex(dates)
    print(df)
    # df['date'] = df.index

    return df
def get_df(name, dates_updated):
    """
    Gets the dataframe from the mongodb, and conducts a pre-prediction if needed (if there are any nan values in the
    last rows of the df), after conducting interpolation to the rows in the middle and in the beggining.

    :param name: the df name
    :param dates_updated: Boolean. If true, then the function is called for chart update, and not initial load. This is
    useful in order to use the existing dates in the table, or map the df to years (2005, 2018).
    :return: the df
    """

    if not dates_updated:
        dates = [f'{x}-01-01' for x in range(2005, 2019)]
        df = Database.from_mongodb_df('dataframes', {'index': name}).reset_index(drop=True)\
            .set_index('date', drop=False).reindex(dates)
        df['date'] = df.index
    else:
        df = Database.from_mongodb_df('dataframes', {'index': name})
    flag = False
    temp_col = None

    if 'date' in df.columns:
        flag = True
        temp_col = df['date']
        df = df.loc[:][[x for x in df.columns if x != 'date']]

    # if there are any nans
    nans = len(df) - df.count()
    if any(nans) >= 1:
        # fill the gaps in the start & in the middle
        df = df.interpolate(method='linear',
                            axis=0,
                            limit_direction='backward')
        # drop the colums with all their values equal to NaN
        df = df.dropna(how='all', axis=1)
        # fills the gaps in the last rows of df, until year 2018
        for col in df.columns:
            if col != 'date':
                predictable_nans = df[col].isna().sum()
                if predictable_nans >= 1:
                    df = pre_predict(df, predictable_nans, col)
    if flag:
        df_date = pd.DataFrame(temp_col)
        df = pd.concat([df_date, df], axis=1)

    return df
def transpose_df(indicators, dates, country_name):
    """
    Gets all the indicator dfs and from them keeps the columns of the country_name provided, merging them in a new df,
    and keeping those which don't have more than 60% of nan values.

    :param indicators: the list with the indicators, got from mongodb
    :param dates: the dates list, with the dates/rows to keep
    :param country_name: the name of the country/df which will be constructed
    :return: the country df
    """
    country_name = country_name.replace('_', ' ')
    # check if the country_name provided exists in indicator_df columns and get its index
    idx = 0
    for i in range(len(indicators)):
        if country_name in Database.from_mongodb_df('dataframes', {'index': indicators[i]}):
            idx = i

    master_df = pd.DataFrame(Database.from_mongodb_df('dataframes', {'index': indicators[idx]})
                             # .sort_values(by=['date'], ascending=False)
                             .reset_index(drop=True).set_index('date', drop=True).reindex(dates)[country_name]
                             .rename(indicators[idx])).astype(float)
    for indicator in indicators:
        if indicator != indicators[idx]:
            # get from mongodb, set index the dates & keep only the ones between (2005,2018) or until the year predicted
            indicator_df = Database.from_mongodb_df('dataframes', {'index': indicator}).reset_index(drop=True)\
                .set_index('date', drop=True).reindex(dates)

            if country_name in indicator_df.columns:
                indicator_df = indicator_df[country_name]
                # rename the series whith the indicator's name, so that it will be it's column name
                indicator_df = pd.DataFrame(indicator_df.rename(indicator)).astype(float)

                # merge columns to a single dataframe
                master_df = master_df.merge(indicator_df, on='date')
    # drop the columns with more than 60% nan values
    master_df = master_df.loc[:, master_df.isin([' ', np.nan, 0]).mean() < .6]

    return master_df