def test_save_to_csv(self, mock_df): today = datetime.strftime(datetime.now(), '%Y-%m-%d') helpers.save_to_csv(mock_df, '../data', 'sample-data') mock_df.to_csv.assert_called_with( '../data/{}-sample-data.xz'.format(today), compression='xz', encoding='utf-8', index=False)
def fetch_speeches(data_dir, range_start, range_end): """ :param data_dir: (str) directory in which the output file will be saved :param range_start: (str) date in the format dd/mm/yyyy :param range_end: (str) date in the format dd/mm/yyyy """ speeches = SpeechesDataset() df = speeches.fetch(range_start, range_end) save_to_csv(df, data_dir, "speeches") return df
def fetch_speeches(data_dir, range_start, range_end): """ :param data_dir: (str) directory in which the output file will be saved :param range_start: (str) date in the format dd/mm/yyyy :param range_end: (str) date in the format dd/mm/yyyy """ speeches = SpeechesDataset() df = speeches.fetch(range_start, range_end) save_to_csv(df, data_dir, "speeches") return df
def fetch_official_missions(data_dir, start_date, end_date): """ :param data_dir: (str) directory in which the output file will be saved :param start_date: (datetime) first date of the range to be scraped :param end_date: (datetime) last date of the range to be scraped """ official_missions = OfficialMissionsDataset() df = official_missions.fetch(start_date, end_date) save_to_csv(df, data_dir, "official-missions") return df
def fetch_official_missions(data_dir, start_date, end_date): """ :param data_dir: (str) directory in which the output file will be saved :param start_date: (datetime) first date of the range to be scraped :param end_date: (datetime) last date of the range to be scraped """ official_missions = OfficialMissionsDataset() df = official_missions.fetch(start_date, end_date) save_to_csv(df, data_dir, "official-missions") return df
def fetch_deputies(data_dir): """ :param data_dir: (str) directory in which the output file will be saved """ deputies = DeputiesDataset() df = deputies.fetch() save_to_csv(df, data_dir, "deputies") holders = df.condition == 'Holder' substitutes = df.condition == 'Substitute' print("Total deputies:", len(df)) print("Holder deputies:", len(df[holders])) print("Substitute deputies:", len(df[substitutes])) return df
def fetch_session_start_times(data_dir, pivot, session_dates): """ :param data_dir: (str) directory in which the output file will be saved :param pivot: (int) congressperson document to use as a pivot for scraping the data :param session_dates: (list) datetime objects to fetch the start times for """ session_start_times = SessionStartTimesDataset() df = session_start_times.fetch(pivot, session_dates) save_to_csv(df, data_dir, "session-start-times") print("Dates requested:", len(session_dates)) found = pd.to_datetime(df['date'], format="%Y-%m-%d %H:%M:%S").dt.date.unique() print("Dates found:", len(found)) return df
def fetch_session_start_times(data_dir, pivot, session_dates): """ :param data_dir: (str) directory in which the output file will be saved :param pivot: (int) congressperson document to use as a pivot for scraping the data :param session_dates: (list) datetime objects to fetch the start times for """ session_start_times = SessionStartTimesDataset() df = session_start_times.fetch(pivot, session_dates) save_to_csv(df, data_dir, "session-start-times") print("Dates requested:", len(session_dates)) found = pd.to_datetime(df['date'], format="%Y-%m-%d %H:%M:%S").dt.date.unique() print("Dates found:", len(found)) return df
def fetch_presences(data_dir, deputies, date_start, date_end): """ :param data_dir: (str) directory in which the output file will be saved :param deputies: (pandas.DataFrame) a dataframe with deputies data :param date_start: (str) a date in the format dd/mm/yyyy :param date_end: (str) a date in the format dd/mm/yyyy """ presences = PresencesDataset() df = presences.fetch(deputies, date_start, date_end) save_to_csv(df, data_dir, "presences") print("Presence records:", len(df)) print("Records of deputies present on a session:", len(df[df.presence == 'Present'])) print("Records of deputies absent from a session:", len(df[df.presence == 'Absent'])) return df