Example #1
0
    def test_save_to_csv(self, mock_df):
        today = datetime.strftime(datetime.now(), '%Y-%m-%d')
        helpers.save_to_csv(mock_df, '../data', 'sample-data')

        mock_df.to_csv.assert_called_with(
            '../data/{}-sample-data.xz'.format(today),
            compression='xz',
            encoding='utf-8',
            index=False)
Example #2
0
def fetch_speeches(data_dir, range_start, range_end):
    """
    :param data_dir: (str) directory in which the output file will be saved
    :param range_start: (str) date in the format dd/mm/yyyy
    :param range_end: (str) date in the format dd/mm/yyyy
    """
    speeches = SpeechesDataset()
    df = speeches.fetch(range_start, range_end)
    save_to_csv(df, data_dir, "speeches")
    return df
def fetch_speeches(data_dir, range_start, range_end):
    """
    :param data_dir: (str) directory in which the output file will be saved
    :param range_start: (str) date in the format dd/mm/yyyy
    :param range_end: (str) date in the format dd/mm/yyyy
    """
    speeches = SpeechesDataset()
    df = speeches.fetch(range_start, range_end)
    save_to_csv(df, data_dir, "speeches")
    return df
def fetch_official_missions(data_dir, start_date, end_date):
    """
    :param data_dir: (str) directory in which the output file will be saved
    :param start_date: (datetime) first date of the range to be scraped
    :param end_date: (datetime) last date of the range to be scraped
    """
    official_missions = OfficialMissionsDataset()
    df = official_missions.fetch(start_date, end_date)
    save_to_csv(df, data_dir, "official-missions")

    return df
def fetch_official_missions(data_dir, start_date, end_date):
    """
    :param data_dir: (str) directory in which the output file will be saved
    :param start_date: (datetime) first date of the range to be scraped
    :param end_date: (datetime) last date of the range to be scraped
    """
    official_missions = OfficialMissionsDataset()
    df = official_missions.fetch(start_date, end_date)
    save_to_csv(df, data_dir, "official-missions")

    return df
def fetch_deputies(data_dir):
    """
    :param data_dir: (str) directory in which the output file will be saved
    """
    deputies = DeputiesDataset()
    df = deputies.fetch()
    save_to_csv(df, data_dir, "deputies")

    holders = df.condition == 'Holder'
    substitutes = df.condition == 'Substitute'
    print("Total deputies:", len(df))
    print("Holder deputies:", len(df[holders]))
    print("Substitute deputies:", len(df[substitutes]))
    return df
def fetch_session_start_times(data_dir, pivot, session_dates):
    """
    :param data_dir: (str) directory in which the output file will be saved
    :param pivot: (int) congressperson document to use as a pivot for scraping the data
    :param session_dates: (list) datetime objects to fetch the start times for
    """
    session_start_times = SessionStartTimesDataset()
    df = session_start_times.fetch(pivot, session_dates)
    save_to_csv(df, data_dir, "session-start-times")

    print("Dates requested:", len(session_dates))
    found = pd.to_datetime(df['date'], format="%Y-%m-%d %H:%M:%S").dt.date.unique()
    print("Dates found:", len(found))
    return df
Example #8
0
def fetch_session_start_times(data_dir, pivot, session_dates):
    """
    :param data_dir: (str) directory in which the output file will be saved
    :param pivot: (int) congressperson document to use as a pivot for scraping the data
    :param session_dates: (list) datetime objects to fetch the start times for
    """
    session_start_times = SessionStartTimesDataset()
    df = session_start_times.fetch(pivot, session_dates)
    save_to_csv(df, data_dir, "session-start-times")

    print("Dates requested:", len(session_dates))
    found = pd.to_datetime(df['date'],
                           format="%Y-%m-%d %H:%M:%S").dt.date.unique()
    print("Dates found:", len(found))
    return df
def fetch_presences(data_dir, deputies, date_start, date_end):
    """
    :param data_dir: (str) directory in which the output file will be saved
    :param deputies: (pandas.DataFrame) a dataframe with deputies data
    :param date_start: (str) a date in the format dd/mm/yyyy
    :param date_end: (str) a date in the format dd/mm/yyyy
    """
    presences = PresencesDataset()
    df = presences.fetch(deputies, date_start, date_end)
    save_to_csv(df, data_dir, "presences")

    print("Presence records:", len(df))
    print("Records of deputies present on a session:",
          len(df[df.presence == 'Present']))
    print("Records of deputies absent from a session:",
          len(df[df.presence == 'Absent']))

    return df