def main(argv): global g parser = argparse.ArgumentParser() parser.add_argument('--extract-dataset', required=False, help='Name of dataset in the input JSON file to ' \ 'extract into output CSV file. NOTE: Output file will be timestamped derivative of input JSON file and ' \ 'dataset name.') parser.add_argument('--list-datasets', action='store_true', help='If specified, overrides all other flags and ' \ 'opens input JSON file and dumps list of datasets found in the file.') parser.add_argument( '--investment-json-filename', required=False, type=argparse.FileType('r'), help= 'Name of input JSON file containing investment data retrieved using get_investment_data.py' ) parser.add_argument('--message-output-filename', required=False, help='Filename of message output file. If ' + 'unspecified, defaults to stderr') g.args = parser.parse_args() message_level = 'Info' util.set_logger(message_level, g.args.message_output_filename, os.path.basename(__file__)) if not ( g.args.list_datasets and g.args.investment_json_filename is not None) and \ (g.args.investment_json_filename is None or g.args.extract_dataset is None): print('NOTE: Must specify either (--investment-json-filename and --list-datasets) or '\ '(--investment-json-filename and --extract-dataset)') parser.print_help() util.sys_exit(0) if g.args.list_datasets: ListDatasets(g.args.investment_json_filename) else: ExtractDataset2CsvFile(g.args.investment_json_filename, g.args.extract_dataset) util.sys_exit(0)
def get_schedules_from_ini(): config_file_path = os.path.dirname(os.path.abspath(__file__)) + "/ccb_backup.ini" config_parser = ConfigParser.ConfigParser() config_parser.read(config_file_path) schedules = [] curr_datetime = datetime.datetime.now(pytz.UTC) message_info("Current UTC datetime: " + str(curr_datetime)) for schedule in config_parser.items("schedules"): schedule_parms = schedule[1].split(",") if len(schedule_parms) != 3: message_error( "ccb_backup.ini [schedules] entry '" + schedule[0] + "=" + schedule[1] + "' is invalid. " "Must contain 3 comma-separated fields. Aborting!" ) util.sys_exit(1) folder_name = schedule_parms[0].strip() delta_time_string = schedule_parms[1].strip() num_files_to_keep_string = schedule_parms[2].strip() try: num_files_to_keep = int(num_files_to_keep_string) except: message_error( "ccb_backup.ini [schedules] entry '" + schedule[0] + "=" + schedule[1] + "' is " "invalid. '" + num_files_to_keep_string + "' must be a positive integer" ) util.sys_exit(1) if num_files_to_keep < 0: message_error( "ccb_backup.ini [schedules] entry '" + schedule[0] + "=" + schedule[1] + "' is " "invalid. Specified a negative number of files to keep" ) util.sys_exit(1) backup_after_datetime = now_minus_delta_time(delta_time_string) if backup_after_datetime is None: message_error( "ccb_backup.ini [schedules] entry '" + schedule[0] + "=" + schedule[1] + "' contains " "an invalid interval between backups '" + delta_time_string + "'. Aborting!" ) util.sys_exit(1) schedules.append( { "folder_name": folder_name, "backup_after_datetime": backup_after_datetime, "num_files_to_keep": num_files_to_keep, } ) return schedules
def get_schedules_from_ini(): config_file_path = os.path.dirname( os.path.abspath(__file__)) + '/web_backup.ini' config_parser = ConfigParser.ConfigParser() config_parser.read(config_file_path) schedules = [] curr_datetime = datetime.datetime.now(pytz.UTC) message_info('Current UTC datetime: ' + str(curr_datetime)) for schedule in config_parser.items('schedules'): schedule_parms = schedule[1].split(',') if len(schedule_parms) != 3: message_error("web_backup.ini [schedules] entry '" + schedule[0] + '=' + schedule[1] + "' is invalid. " \ "Must contain 3 comma-separated fields. Aborting!") util.sys_exit(1) folder_name = schedule_parms[0].strip() delta_time_string = schedule_parms[1].strip() num_files_to_keep_string = schedule_parms[2].strip() try: num_files_to_keep = int(num_files_to_keep_string) except: message_error("web_backup.ini [schedules] entry '" + schedule[0] + '=' + schedule[1] + "' is " \ "invalid. '" + num_files_to_keep_string + "' must be a positive integer") util.sys_exit(1) if num_files_to_keep < 0: message_error("web_backup.ini [schedules] entry '" + schedule[0] + '=' + schedule[1] + "' is " \ "invalid. Specified a negative number of files to keep") util.sys_exit(1) backup_after_datetime = now_minus_delta_time(delta_time_string) if backup_after_datetime is None: message_error("web_backup.ini [schedules] entry '" + schedule[0] + '=' + schedule[1] + "' contains " \ "an invalid interval between backups '" + delta_time_string + "'. Aborting!") util.sys_exit(1) schedules.append({ 'folder_name': folder_name, 'backup_after_datetime': backup_after_datetime, 'num_files_to_keep': num_files_to_keep }) return schedules
def main(argv): global g parser = argparse.ArgumentParser() parser.add_argument('--output-filename', required=False, help='Output CSV filename. Defaults to ./tmp/pledges_[datetime_stamp].csv') parser.add_argument('--message-output-filename', required=False, help='Filename of message output file. If ' + 'unspecified, defaults to stderr') g.args = parser.parse_args() message_level = util.get_ini_setting('logging', 'level') util.set_logger(message_level, g.args.message_output_filename, os.path.basename(__file__)) ccb_app_username = util.get_ini_setting('ccb', 'app_username', False) ccb_app_password = util.get_ini_setting('ccb', 'app_password', False) ccb_subdomain = util.get_ini_setting('ccb', 'subdomain', False) curr_date_str = datetime.datetime.now().strftime('%m/%d/%Y') pledge_summary_report_info = { "id":"", "type":"pledge_giving_summary", "pledge_type":"family", "date_range":"", "ignore_static_range":"static", "start_date":"01/01/1990", "end_date":curr_date_str, "campus_ids":["1"], "output":"csv" } pledge_summary_request = { 'request': json.dumps(pledge_summary_report_info), 'output': 'export' } pledge_detail_dialog_report_info = { "type":"pledge_giving_detail", "id":"" } pledge_detail_dialog_request = { 'aj': 1, 'ax': 'create_modal', 'request': json.dumps(pledge_detail_dialog_report_info), } pledge_detail_report_info = { 'id':'', 'type': 'pledge_giving_detail', 'transaction_detail_type_id': '{coa_id}', # {coa_id} is substituted at run-time 'print_type': 'family', 'split_child_records': '1', 'show': 'all', 'date_range': '', 'ignore_static_range': 'static', 'start_date': '01/01/1990', 'end_date': curr_date_str, 'campus_ids': ['1'], 'output': 'csv' } pledge_detail_request = { 'request': json.dumps(pledge_detail_report_info), # This is also replaced at run-time 'output': 'export' } with requests.Session() as http_session: util.login(http_session, ccb_subdomain, ccb_app_username, ccb_app_password) # Get list of pledged categories pledge_summary_response = http_session.post('https://' + ccb_subdomain + '.ccbchurch.com/report.php', data=pledge_summary_request) pledge_summary_succeeded = False if pledge_summary_response.status_code == 200: match_pledge_summary_info = re.search('COA Category', pledge_summary_response.text) if match_pledge_summary_info != None: pledge_summary_succeeded = True if not pledge_summary_succeeded: logging.error('Pledge Summary retrieval failure. Aborting!') util.sys_exit(1) csv_reader = csv.reader(StringIO.StringIO(pledge_summary_response.text.encode('ascii', 'ignore'))) header_row = True list_pledge_categories = [] for row in csv_reader: if header_row: assert row[0] == 'COA Category' header_row = False else: list_pledge_categories.append(unicode(row[0])) # Get dictionary of category option IDs report_page = http_session.get('https://' + ccb_subdomain + '.ccbchurch.com/service/report_settings.php', params=pledge_detail_dialog_request) if report_page.status_code == 200: match_report_options = re.search( '<select\s+name=\\\\"transaction_detail_type_id\\\\"\s+id=\\\\"\\\\"\s*>(.*?)<\\\/select>', report_page.text) pledge_categories_str = match_report_options.group(1) else: logging.error('Error retrieving report settings page. Aborting!') util.sys_exit(1) dict_pledge_categories = {} root_str = '' for option_match in re.finditer(r'<option\s+value=\\"([0-9]+)\\"\s*>([^<]*)<\\/option>', pledge_categories_str): if re.match(r' ', option_match.group(2)): dict_pledge_categories[root_str + ' : ' + option_match.group(2)[6:]] = int(option_match.group(1)) else: root_str = option_match.group(2) dict_pledge_categories[root_str] = int(option_match.group(1)) # Loop over each category with pledges and pull back CSV list of pledges for that category output_csv_header = None if g.args.output_filename is not None: output_filename = g.args.output_filename else: output_filename = './tmp/pledges_' + datetime.datetime.now().strftime('%Y%m%d%H%M%S') + '.csv' util.test_write(output_filename) with open(output_filename, 'wb') as csv_output_file: csv_writer = csv.writer(csv_output_file) for pledge_category in list_pledge_categories: logging.info('Retrieving pledges for ' + pledge_category) if pledge_category in dict_pledge_categories: pledge_detail_report_info['transaction_detail_type_id'] = \ str(dict_pledge_categories[pledge_category]) pledge_detail_request['request'] = json.dumps(pledge_detail_report_info) pledge_detail_response = http_session.post('https://' + ccb_subdomain + \ '.ccbchurch.com/report.php', data=pledge_detail_request) pledge_detail_succeeded = False if pledge_detail_response.status_code == 200 and pledge_detail_response.text[:8] == 'Name(s),': pledge_detail_succeeded = True csv_reader = csv.reader(StringIO.StringIO(pledge_detail_response.text.encode('ascii', 'ignore'))) header_row = True for row in csv_reader: if header_row: header_row = False if output_csv_header is None: output_csv_header = ['COA ID', 'COA Category'] + row amount_column_index = output_csv_header.index('Total Pledged') csv_writer.writerow(output_csv_header) else: row = [dict_pledge_categories[pledge_category], pledge_category] + row if row[amount_column_index] != '0': # Ignore non-pledge (contrib-only) rows csv_writer.writerow(row) if not pledge_detail_succeeded: logging.warning('Pledge Detail retrieval failure for category ' + pledge_category) else: logging.warning('Unknown pledge category. ' + pledge_category) logging.info('Pledge details retrieved successfully and written to ' + output_filename) util.sys_exit(0)
def main(argv): global g parser = argparse.ArgumentParser() parser.add_argument( '--output-filename', required=False, help= 'Output ZIP filename. Defaults to ./tmp/<website_name>_[datetime_stamp].zip' ) parser.add_argument('--message-output-filename', required=False, help='Filename of message output file. If ' \ 'unspecified, then messages are written to stderr as well as into the messages_[datetime_stamp].log file ' \ 'that is zipped into the resulting backup file.') parser.add_argument('--post-to-s3', action='store_true', help='If specified, then the created zip file is ' \ 'posted to Amazon AWS S3 bucket (using bucket URL and password in web_backup.ini file)') parser.add_argument('--delete-zip', action='store_true', help='If specified, then the created zip file is ' \ 'deleted after posting to S3') parser.add_argument('--update-and-secure-wp', action='store_true', help='If specified, then ' \ '/root/bin/update_and_secure_wp utility is run to upgrade Wordpress and plugins and redo security flags '\ 'after backup is completed') parser.add_argument('--website-name', required=False, help='Specified website name is mapped to its ' \ 'hosting directory under /var/www and its contents are recursively zipped and if website is WordPress, ' \ 'wp-config.php is interogated and database .sql backup file created and included in ecrypted zip archive ' \ 'which is posted to S3. If no --website-name is specified, then all websites on this server are listed') parser.add_argument( '--retain-temp-directory', action='store_true', help='If specified, the temp directory ' + 'with output from website directory and WordPress database is not deleted' ) parser.add_argument( '--show-backups-to-do', action='store_true', help='If specified, the ONLY thing that is ' + 'done is backup posts and deletions to S3 are calculated and displayed' ) parser.add_argument('--zip-file-password', required=False, help='If provided, overrides password used to encryt ' \ 'zip file that is created that was specified in web_backup.ini') parser.add_argument('--aws-s3-bucket-name', required=False, help='AWS S3 bucket where output backup zip files ' \ 'are stored') parser.add_argument('--notification-emails', required=False, nargs='*', default=argparse.SUPPRESS, help='If specified, list of email addresses that are emailed upon successful upload to AWS S3, along with ' \ 'accessor link to get at the backup zip file (which is encrypted)') g.args = parser.parse_args() g.program_filename = os.path.basename(__file__) if g.program_filename[-3:] == '.py': g.program_filename = g.program_filename[:-3] message_level = util.get_ini_setting('logging', 'level') script_directory = os.path.dirname(os.path.realpath(__file__)) g.temp_directory = tempfile.mkdtemp(prefix='web_backup_') if g.args.message_output_filename is None: g.message_output_filename = g.temp_directory + '/messages_' + \ datetime.datetime.now().strftime('%Y%m%d%H%M%S') + '.log' else: g.message_output_filename = g.args.message_output_filename util.set_logger(message_level, g.message_output_filename, os.path.basename(__file__)) g.websites = util.get_websites() if g.args.website_name is None or g.args.website_name not in g.websites.keys( ): if g.args.website_name is None: print 'NOTE: --website-name of website to backup was not specified.' else: print 'NOTE: Specified website \'' + g.args.website_name + '\' is not a valid website on this server.' print 'Here\'s a list of websites configured on this server.' print util.print_websites(g.websites) util.sys_exit(0) g.website_directory = g.websites[g.args.website_name]['document_root'] # Don't do work that'd just get deleted if not g.args.post_to_s3 and g.args.delete_zip: message_error( 'Does not make sense to create zip file and delete it without posting to AWS S3. Aborting!' ) util.sys_exit(1) # Load AWS creds which are used for checking need for backup and posting backup file g.aws_access_key_id = util.get_ini_setting('aws', 'access_key_id', False) g.aws_secret_access_key = util.get_ini_setting('aws', 'secret_access_key', False) g.aws_region_name = util.get_ini_setting('aws', 'region_name', False) if g.args.aws_s3_bucket_name is not None: g.aws_s3_bucket_name = g.args.aws_s3_bucket_name else: g.aws_s3_bucket_name = util.get_ini_setting('aws', 's3_bucket_name', False) if g.args.zip_file_password is not None: g.zip_file_password = g.args.zip_file_password else: g.zip_file_password = util.get_ini_setting('zip_file', 'password', False) # Call the base directory the name of the website website_name = os.path.basename(g.website_directory) # Start with assumption no backups to do backups_to_do = None # If user specified just to show work to be done (backups to do), calculate, display, and exit if g.args.show_backups_to_do: backups_to_do = get_backups_to_do(website_name) if backups_to_do is None: message_info('Backups in S3 are already up-to-date. Nothing to do') util.sys_exit(0) else: message_info('There are backups/deletions to do') message_info('Backup plan details: ' + str(backups_to_do)) util.sys_exit(0) # See if there are backups to do backups_to_do = get_backups_to_do(website_name) # If we're posting to S3 and deleting the ZIP file, then utility has been run only for purpose of # posting to S3. See if there are posts to be done and exit if not if g.args.post_to_s3 and g.args.delete_zip and backups_to_do is None: message_info( 'Backups in S3 are already up-to-date. Nothing to do. Exiting!') util.sys_exit(0) # Create ZIP file of website files output_filename = g.temp_directory + '/files.zip' os.chdir(g.website_directory) web_files = os.listdir(g.website_directory) if len(web_files) == 0: message_info('No files in directory ' + g.website_directory + '. Nothing to back up. Aborting.') util.sys_exit(1) exec_zip_list = ['/usr/bin/zip', '-r', output_filename, '.'] message_info('Zipping website files directory') FNULL = open(os.devnull, 'w') exit_status = subprocess.call(exec_zip_list, stdout=FNULL) if exit_status == 0: message_info('Successfully zipped web directory to ' + output_filename) else: message_warning('Error running zip. Exit status ' + str(exit_status)) # Create .sql dump file from website's WordPress database (if applicable) wp_config_filename = g.website_directory + '/wp-config.php' if os.path.isfile(wp_config_filename): output_filename = g.temp_directory + '/database.sql' dict_db_info = get_wp_database_defines( wp_config_filename, ['DB_NAME', 'DB_USER', 'DB_PASSWORD', 'DB_HOST']) message_info('Dumping WordPress MySQL database named ' + dict_db_info['DB_NAME']) mysqldump_string = '/bin/mysqldump -h ' + dict_db_info['DB_HOST'] + ' -u ' + dict_db_info['DB_USER'] + \ ' -p' + dict_db_info['DB_PASSWORD'] + ' ' + dict_db_info['DB_NAME'] + ' --add-drop-table -r ' + \ output_filename try: exec_output = subprocess.check_output(mysqldump_string, stderr=subprocess.STDOUT, shell=True) except subprocess.CalledProcessError as e: print 'mysqldump exited with error status ' + str( e.returncode) + ' and error: ' + e.output util.sys_exit(1) # Generate final results output zip filename if g.args.output_filename is not None: output_filename = g.args.output_filename elif g.args.delete_zip: # We're deleting it when we're done, so we don't care about its location/name. Grab temp filename tmp_file = tempfile.NamedTemporaryFile(prefix='web_backup_', suffix='.zip', delete=False) output_filename = tmp_file.name tmp_file.close() os.remove(output_filename) message_info('Temp filename used for final results zip output: ' + output_filename) else: output_filename = script_directory + '/tmp/' + website_name + '_' + \ datetime.datetime.now().strftime('%Y%m%d%H%M%S') + '.zip' # Zip together results files to create final encrypted zip file exec_zip_list = [ '/usr/bin/zip', '-P', g.zip_file_password, '-j', '-r', output_filename, g.temp_directory + '/' ] message_info('Zipping results files together') exit_status = subprocess.call(exec_zip_list, stdout=FNULL) if exit_status == 0: message_info('Successfully zipped all results to temporary file ' + output_filename) else: message_error('Error running zip. Exit status ' + str(exit_status)) util.sys_exit(1) # Push ZIP file into appropriate schedule folders (daily, weekly, monthly, etc.) and then delete excess # backups in each folder list_completed_backups = [] if 'notification_emails' in vars(g.args): list_notification_emails = g.args.notification_emails else: list_notification_emails = None if g.args.post_to_s3 and backups_to_do is not None: for folder_name in backups_to_do: if backups_to_do[folder_name]['do_backup']: s3_key = upload_to_s3(website_name, folder_name, output_filename) expiry_days = { 'daily': 1, 'weekly': 7, 'monthly': 31 }[folder_name] expiring_url = gen_s3_expiring_url(s3_key, expiry_days) message_info('Backup URL ' + expiring_url + ' is valid for ' + str(expiry_days) + ' days') list_completed_backups.append( [folder_name, expiring_url, expiry_days]) for item_to_delete in backups_to_do[folder_name][ 'files_to_delete']: delete_from_s3(item_to_delete) if list_notification_emails is not None: send_email_notification(list_completed_backups, list_notification_emails) # If user asked not to retain temp directory, don't delete it! Else, delete it if g.args.retain_temp_directory: message_info('Retained temporary output directory ' + g.temp_directory) else: shutil.rmtree(g.temp_directory) message_info('Temporary output directory deleted') # If user requested generated zip file be deleted, delete it if g.args.delete_zip: os.remove(output_filename) message_info('Output final results zip file deleted') # If its a Wordpress site and user requested, after backup is complete, run /root/bin/update_and_secure_wp utility if 'wordpress_database' in g.websites[ g.args.website_name] and g.args.update_and_secure_wp: message_info( 'Updating and (re)securing Wordpress after backup as requested') try: exec_output = subprocess.check_output( '/root/bin/update_and_secure_wp ' + g.website_directory, stderr=subprocess.STDOUT, shell=True) except subprocess.CalledProcessError as e: print '/root/bin/update_and_secure_wp utility exited with error status ' + str(e.returncode) + \ ' and error: ' + e.output util.sys_exit(1) message_info('Done!') util.sys_exit(0)
def main(argv): global g parser = argparse.ArgumentParser() parser.add_argument('--input-filename', required=False, help='Name of input XML file from previous ' + 'group_profiles XML retrieval. If not specified, groups XML data retreived from CCB REST API.') parser.add_argument('--output-groups-filename', required=False, help='Name of CSV output file listing group ' + 'information. Defaults to ./tmp/groups_[datetime_stamp].csv') parser.add_argument('--output-participants-filename', required=False, help='Name of CSV output file listing ' + 'group participant information. Defaults to ./tmp/group_participants_[datetime_stamp].csv') parser.add_argument('--message-output-filename', required=False, help='Filename of message output file. If ' + 'unspecified, defaults to stderr') parser.add_argument('--keep-temp-file', action='store_true', help='If specified, temp file created with XML ' + 'from REST API call is not deleted') g.args = parser.parse_args() message_level = util.get_ini_setting('logging', 'level') util.set_logger(message_level, g.args.message_output_filename, os.path.basename(__file__)) ccb_subdomain = util.get_ini_setting('ccb', 'subdomain', False) ccb_api_username = util.get_ini_setting('ccb', 'api_username', False) ccb_api_password = util.get_ini_setting('ccb', 'api_password', False) # Set groups and participant filenames and test validity if g.args.output_groups_filename is not None: output_groups_filename = g.args.output_groups_filename else: output_groups_filename = './tmp/groups_' + datetime.datetime.now().strftime('%Y%m%d%H%M%S') + '.csv' util.test_write(output_groups_filename) if g.args.output_participants_filename is not None: output_participants_filename = g.args.output_participants_filename else: output_participants_filename = './tmp/group_participants_' + \ datetime.datetime.now().strftime('%Y%m%d%H%M%S') + '.csv' util.test_write(output_participants_filename) if g.args.input_filename is not None: # Pull groups XML from input file specified by user input_filename = g.args.input_filename else: input_filename = util.ccb_rest_xml_to_temp_file(ccb_subdomain, 'group_profiles', ccb_api_username, ccb_api_password) if input_filename is None: logging.error('Could not retrieve group_profiles, so aborting!') util.sys_exit(1) # Properties to peel off each 'group' node in XML list_group_props = [ 'name', 'description', 'campus', 'group_type', 'department', 'area', 'group_capacity', 'meeting_day', 'meeting_time', 'childcare_provided', 'interaction_type', 'membership_type', 'notification', 'listed', 'public_search_listed', 'inactive' ] participant_nodes = [ 'ccb_api/response/groups/group/director', 'ccb_api/response/groups/group/coach', 'ccb_api/response/groups/group/main_leader', 'ccb_api/response/groups/group/leaders/leader', 'ccb_api/response/groups/group/participants/participant' ] path = [] dict_path_ids = {} group_id = None logging.info('Creating groups and group participants output files.') with open(output_groups_filename, 'wb') as csv_output_groups_file: csv_writer_groups = csv.writer(csv_output_groups_file) csv_writer_groups.writerow(['id'] + list_group_props) with open(output_participants_filename, 'wb') as csv_output_participants_file: csv_writer_participants = csv.writer(csv_output_participants_file) csv_writer_participants.writerow(['group_id', 'participant_id', 'participant_type']) for event, elem in ElementTree.iterparse(input_filename, events=('start', 'end')): if event == 'start': path.append(elem.tag) full_path = '/'.join(path) if full_path == 'ccb_api/response/groups/group': current_group_id = elem.attrib['id'] elif event == 'end': if full_path == 'ccb_api/response/groups/group': # Emit 'groups' row props_csv = util.get_elem_id_and_props(elem, list_group_props) csv_writer_groups.writerow(props_csv) elem.clear() # Throw away 'group' node from memory when done processing it elif full_path in participant_nodes: # Emit 'group_participants' row props_csv = [ current_group_id, elem.attrib['id'], elem.tag ] csv_writer_participants.writerow(props_csv) path.pop() full_path = '/'.join(path) logging.info('Groups written to ' + output_groups_filename) logging.info('Group Participants written to ' + output_participants_filename) # If caller didn't specify input filename, then delete the temporary file we retrieved into if g.args.input_filename is None: if g.args.keep_temp_file: logging.info('Temporary downloaded XML retained in file: ' + input_filename) else: os.remove(input_filename) util.sys_exit(0)
def main(argv): global g parser = argparse.ArgumentParser() parser.add_argument('--output-filename', required=False, help='Output CSV filename. Defaults to ./tmp/[datetime_stamp]_pledges.csv') parser.add_argument('--message-output-filename', required=False, help='Filename of message output file. If ' + 'unspecified, defaults to stderr') g.args = parser.parse_args() message_level = util.get_ini_setting('logging', 'level') util.set_logger(message_level, g.args.message_output_filename, os.path.basename(__file__)) ccb_app_username = util.get_ini_setting('ccb', 'app_username', False) ccb_app_password = util.get_ini_setting('ccb', 'app_password', False) ccb_subdomain = util.get_ini_setting('ccb', 'subdomain', False) curr_date_str = datetime.datetime.now().strftime('%m/%d/%Y') individual_detail_report_info = { 'id':'', 'type': 'export_individuals_change_log', 'print_type': 'export_individuals', 'query_id': '', 'campus_ids': ['1'] } individual_detail_request = { 'request': json.dumps(individual_detail_report_info), 'output': 'export' } with requests.Session() as http_session: util.login(http_session, ccb_subdomain, ccb_app_username, ccb_app_password) # Pull back complete CSV containing detail info for every individual in CCB database output_csv_header = None if g.args.output_filename is not None: output_filename = g.args.output_filename else: output_filename = './tmp/individuals_' + datetime.datetime.now().strftime('%Y%m%d%H%M%S') + '.csv' util.test_write(output_filename) with open(output_filename, 'wb') as csv_output_file: csv_writer = csv.writer(csv_output_file) logging.info('Note that it takes CCB a minute or two to pull retrive all individual information') individual_detail_response = http_session.post('https://' + ccb_subdomain + '.ccbchurch.com/report.php', data=individual_detail_request) individual_detail_succeeded = False if individual_detail_response.status_code == 200 and \ individual_detail_response.text[:16] == '"Individual ID",': individual_detail_succeeded = True csv_reader = csv.reader(StringIO.StringIO(individual_detail_response.text.encode('ascii', 'ignore'))) for row in csv_reader: csv_writer.writerow(row) if not individual_detail_succeeded: logging.error('Individual Detail retrieval failed') util.sys_exit(1) else: logging.info('Individual info successfully retrieved into file ' + output_filename) util.sys_exit(0)
def Quit(): global g if not g.args.live_window: g.driver.quit() util.sys_exit(0)
def main(argv): global g # Determine which data sets we're backing up g.backup_data_sets_dict = { "individuals": [True, None], "groups": [True, "participants"], "attendance": [True, "events"], "pledges": [True, None], "contributions": [True, None], } backup_data_sets_str = " ".join([x.upper() for x in g.backup_data_sets_dict]) parser = argparse.ArgumentParser() parser.add_argument( "--output-filename", required=False, help="Output ZIP filename. Defaults to ./tmp/ccb_backup_[datetime_stamp].zip", ) parser.add_argument( "--message-output-filename", required=False, help="Filename of message output file. If " + "unspecified, then messages are written to stderr as well as into the messages_[datetime_stamp].log file " + "that is zipped into the resulting backup file.", ) parser.add_argument( "--post-to-s3", action="store_true", help="If specified, then the created zip file is " + "posted to Amazon AWS S3 bucket (using bucket URL and password in ccb_backup.ini file)", ) parser.add_argument( "--delete-zip", action="store_true", help="If specified, then the created zip file is " + "deleted after posting to S3", ) parser.add_argument( "--source-directory", required=False, help="If provided, then get_*.py utilities are not " + "executed to create new output data, but instead files in this specified directory are used " + "to zip and optionally post to AWS S3", ) parser.add_argument( "--retain-temp-directory", action="store_true", help="If specified, the temp directory " + "without output from get_*.py utilities is not deleted", ) parser.add_argument( "--show-backups-to-do", action="store_true", help="If specified, the ONLY thing that is " + "done is backup posts and deletions to S3 are calculated and displayed", ) parser.add_argument( "--all-time", action="store_true", help="Normally, attendance data is only archived for " + "current year (figuring earlier backups covered earlier years). But specifying this flag, collects " "attendance data not just for this year but across all years", ) parser.add_argument( "--backup-data-sets", required=False, nargs="*", default=argparse.SUPPRESS, help="If unspecified, *all* CCB data is backed up. If specified then one or more of the following " "data sets must be specified and only the specified data sets are backed up: " + backup_data_sets_str, ) parser.add_argument( "--zip-file-password", required=False, help="If provided, overrides password used to encryt " "zip file that is created that was specified in ccb_backup.ini", ) parser.add_argument( "--aws-s3-bucket-name", required=False, help="If provided, overrides AWS S3 bucket where " "output backup zip files are stored", ) parser.add_argument( "--notification-emails", required=False, nargs="*", default=argparse.SUPPRESS, help="If specified, list of email addresses that are emailed upon successful upload to AWS S3, along with " "accessor link to get at the backup zip file (which is encrypted)", ) g.args = parser.parse_args() g.program_filename = os.path.basename(__file__) if g.program_filename[-3:] == ".py": g.program_filename = g.program_filename[:-3] message_level = util.get_ini_setting("logging", "level") g.temp_directory = tempfile.mkdtemp(prefix="ccb_backup_") if g.args.message_output_filename is None: g.message_output_filename = ( g.temp_directory + "/messages_" + datetime.datetime.now().strftime("%Y%m%d%H%M%S") + ".log" ) else: g.message_output_filename = g.args.message_output_filename util.set_logger(message_level, g.message_output_filename, os.path.basename(__file__)) # If specified, validate list of backup_data_sets that we're backing up if "backup_data_sets" in vars(g.args): # If specifying individual data sets to backup, start assuming we're backing up none of them for data_set_name in g.backup_data_sets_dict: g.backup_data_sets_dict[data_set_name][0] = False for backup_data_set in g.args.backup_data_sets: backup_data_set_str = backup_data_set.lower() if backup_data_set_str not in g.backup_data_sets_dict: message_error( "Specified --backup-data-sets value '" + backup_data_set + "' must be one of: " + backup_data_sets_str + ". Aborting!" ) sys.exit(1) else: g.backup_data_sets_dict[backup_data_set_str][0] = True # Don't do work that'd just get deleted if not g.args.post_to_s3 and g.args.delete_zip: message_error("Does not make sense to create zip file and delete it without posting to AWS S3. Aborting!") util.sys_exit(1) # Load AWS creds which are used for checking need for backup and posting backup file g.aws_access_key_id = util.get_ini_setting("aws", "access_key_id", False) g.aws_secret_access_key = util.get_ini_setting("aws", "secret_access_key", False) g.aws_region_name = util.get_ini_setting("aws", "region_name", False) if g.args.aws_s3_bucket_name is not None: g.aws_s3_bucket_name = g.args.aws_s3_bucket_name else: g.aws_s3_bucket_name = util.get_ini_setting("aws", "s3_bucket_name", False) if g.args.zip_file_password is not None: g.zip_file_password = g.args.zip_file_password else: g.zip_file_password = util.get_ini_setting("zip_file", "password", False) # Start with assumption no backups to do backups_to_do = None # If user specified just to show work to be done (backups to do), calculate, display, and exit if g.args.show_backups_to_do: backups_to_do = get_backups_to_do() if backups_to_do is None: message_info("Backups in S3 are already up-to-date. Nothing to do") util.sys_exit(0) else: message_info("There are backups/deletions to do") message_info("Backup plan details: " + str(backups_to_do)) util.sys_exit(0) # See if there are backups to do backups_to_do = get_backups_to_do() # If we're posting to S3 and deleting the ZIP file, then utility has been run only for purpose of # posting to S3. See if there are posts to be done and exit if not if g.args.post_to_s3 and g.args.delete_zip and backups_to_do is None: message_info("Backups in S3 are already up-to-date. Nothing to do. Exiting!") util.sys_exit(0) # If user specified a directory with set of already-created get_*.py utilities output files to use, then # do not run get_*.py data collection utilities, just use that if g.args.source_directory is not None: g.temp_directory = g.args.source_directory else: # Run get_XXX.py utilities into datetime_stamped CSV output files and messages_output.log output in # temp directory g.run_util_errors = [] for data_set_name in g.backup_data_sets_dict: if g.backup_data_sets_dict[data_set_name][0]: run_util(data_set_name, g.backup_data_sets_dict[data_set_name][1]) message_info("Finished all data collection") # Create output ZIP file if g.args.output_filename is not None: output_filename = g.args.output_filename elif g.args.delete_zip: # We're deleting it when we're done, so we don't care about its location/name. Grab temp filename tmp_file = tempfile.NamedTemporaryFile(prefix="ccb_backup_", suffix=".zip", delete=False) output_filename = tmp_file.name tmp_file.close() os.remove(output_filename) print "Temp filename: " + output_filename else: output_filename = "./tmp/ccb_backup_" + datetime.datetime.now().strftime("%Y%m%d%H%M%S") + ".zip" exec_zip_list = ["/usr/bin/zip", "-P", g.zip_file_password, "-j", "-r", output_filename, g.temp_directory + "/"] message_info("Zipping data collection results files") exit_status = subprocess.call(exec_zip_list) if exit_status == 0: message_info("Successfully zipped get_*.py utilities output and messages log to " + output_filename) else: message_warning("Error running zip. Exit status " + str(exit_status)) # Push ZIP file into appropriate schedule folders (daily, weekly, monthly, etc.) and then delete excess # backups in each folder list_completed_backups = [] if "notification_emails" in vars(g.args): list_notification_emails = g.args.notification_emails else: list_notification_emails = None if backups_to_do is not None: for folder_name in backups_to_do: if backups_to_do[folder_name]["do_backup"]: s3_key = upload_to_s3(folder_name, output_filename) expiry_days = {"daily": 1, "weekly": 7, "monthly": 31}[folder_name] expiring_url = gen_s3_expiring_url(s3_key, expiry_days) message_info("Backup URL " + expiring_url + " is valid for " + str(expiry_days) + " days") list_completed_backups.append([folder_name, expiring_url, expiry_days]) for item_to_delete in backups_to_do[folder_name]["files_to_delete"]: delete_from_s3(item_to_delete) if list_notification_emails is not None: send_email_notification(list_completed_backups, list_notification_emails) # If user specified the source directory, don't delete it! And if user asked not to retain temp directory, # don't delete it! if g.args.source_directory is None: if g.args.retain_temp_directory: message_info("Retained temporary output directory " + g.temp_directory) else: shutil.rmtree(g.temp_directory) message_info("Temporary output directory deleted") util.sys_exit(0)
def main(argv): global g parser = argparse.ArgumentParser() parser.add_argument('--input-events-filename', required=False, help='Name of input CSV file from previous ' + 'event occurrences retrieval. If not specified, event list CSV data is retrieved from CCB UI.') parser.add_argument('--output-events-filename', required=False, help='Name of CSV output file listing event ' + 'information. Defaults to ./tmp/events_[datetime_stamp].csv') parser.add_argument('--output-attendance-filename', required=False, help='Name of CSV output file listing ' + 'attendance information. Defaults to ./tmp/attendance_[datetime_stamp].csv') parser.add_argument('--message-output-filename', required=False, help='Filename of message output file. If ' + 'unspecified, defaults to stderr') parser.add_argument('--keep-temp-file', action='store_true', help='If specified, temp event occurrences CSV ' + \ 'file created with CSV data pulled from CCB UI (event list report) is not deleted so it can be used ' + \ 'in subsequent runs') parser.add_argument('--all-time', action='store_true', help='Normally, attendance data is only archived for ' + \ 'current year (figuring earlier backups covered earlier years). But setting this flag, collects ' \ 'attendance data note just for this year but across all years') g.args = parser.parse_args() message_level = util.get_ini_setting('logging', 'level') util.set_logger(message_level, g.args.message_output_filename, os.path.basename(__file__)) g.ccb_subdomain = util.get_ini_setting('ccb', 'subdomain', False) ccb_app_username = util.get_ini_setting('ccb', 'app_username', False) ccb_app_password = util.get_ini_setting('ccb', 'app_password', False) g.ccb_api_username = util.get_ini_setting('ccb', 'api_username', False) g.ccb_api_password = util.get_ini_setting('ccb', 'api_password', False) datetime_now = datetime.datetime.now() curr_date_str = datetime_now.strftime('%m/%d/%Y') if g.args.all_time: start_date_str = '01/01/1990' else: start_date_str = '01/01/' + datetime_now.strftime('%Y') logging.info('Gathering attendance data between ' + start_date_str + ' and ' + curr_date_str) event_list_info = { "id":"", "type":"event_list", "date_range":"", "ignore_static_range":"static", "start_date":start_date_str, "end_date":curr_date_str, "additional_event_types":["","non_church_wide_events","filter_off"], "campus_ids":["1"], "output":"csv" } event_list_request = { 'request': json.dumps(event_list_info), 'output': 'export' } # Set events and attendance filenames and test validity if g.args.output_events_filename is not None: output_events_filename = g.args.output_events_filename else: output_events_filename = './tmp/events_' + datetime.datetime.now().strftime('%Y%m%d%H%M%S') + '.csv' util.test_write(output_events_filename) if g.args.output_attendance_filename is not None: output_attendance_filename = g.args.output_attendance_filename else: output_attendance_filename = './tmp/attendance_' + \ datetime.datetime.now().strftime('%Y%m%d%H%M%S') + '.csv' util.test_write(output_attendance_filename) input_filename = util.ccb_rest_xml_to_temp_file(g.ccb_subdomain, 'event_profiles', g.ccb_api_username, g.ccb_api_password) if input_filename is None: logging.error('CCB REST API call for event_profiles failed. Aborting!') util.sys_exit(1) # Properties to peel off each 'event' node in XML list_event_props = [ 'name', 'description', 'leader_notes', 'start_datetime', 'end_datetime', 'timezone', 'recurrence_description', 'approval_status', 'listed', 'public_calendar_listed' ] # Also collect event_id, group_id, organizer_id path = [] dict_list_event_names = defaultdict(list) with open(output_events_filename, 'wb') as csv_output_events_file: csv_writer_events = csv.writer(csv_output_events_file) csv_writer_events.writerow(['event_id'] + list_event_props + ['group_id', 'organizer_id']) # Write header row for event, elem in ElementTree.iterparse(input_filename, events=('start', 'end')): if event == 'start': path.append(elem.tag) full_path = '/'.join(path) if full_path == 'ccb_api/response/events/event': current_event_id = elem.attrib['id'] elif event == 'end': if full_path == 'ccb_api/response/events/event': # Emit 'events' row props_csv = util.get_elem_id_and_props(elem, list_event_props) event_id = props_csv[0] # get_elem_id_and_props() puts 'id' prop at index 0 name = props_csv[1] # Cheating here...we know 'name' prop is index 1 dict_list_event_names[name].append(event_id) props_csv.append(current_group_id) props_csv.append(current_organizer_id) csv_writer_events.writerow(props_csv) elem.clear() # Throw away 'event' node from memory when done processing it elif full_path == 'ccb_api/response/events/event/group': current_group_id = elem.attrib['id'] elif full_path == 'ccb_api/response/events/event/organizer': current_organizer_id = elem.attrib['id'] path.pop() full_path = '/'.join(path) if g.args.input_events_filename is not None: # Pull calendared events CSV from file input_filename = g.args.input_events_filename else: # Create UI user session to pull list of calendared events logging.info('Logging in to UI session') with requests.Session() as http_session: util.login(http_session, g.ccb_subdomain, ccb_app_username, ccb_app_password) # Get list of all scheduled events logging.info('Retrieving list of all scheduled events. This might take a couple minutes!') event_list_response = http_session.post('https://' + g.ccb_subdomain + '.ccbchurch.com/report.php', data=event_list_request) event_list_succeeded = False if event_list_response.status_code == 200: event_list_response.raw.decode_content = True with tempfile.NamedTemporaryFile(delete=False) as temp: input_filename = temp.name first_chunk = True for chunk in event_list_response.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks if first_chunk: if chunk[:13] != '"Event Name",': logging.error('Mis-formed calendared events CSV returned. Aborting!') util.sys_exit(1) first_chunk = False temp.write(chunk) temp.flush() with open(input_filename, 'rb') as csvfile: csv_reader = csv.reader(csvfile) with open(output_attendance_filename, 'wb') as csv_output_file: csv_writer = csv.writer(csv_output_file) csv_writer.writerow(['event_id', 'event_occurrence', 'individual_id', 'count']) header_row = True for row in csv_reader: if header_row: header_row = False output_csv_header = row event_name_column_index = row.index('Event Name') attendance_column_index = row.index('Actual Attendance') date_column_index = row.index('Date') start_time_column_index = row.index('Start Time') else: # Retrieve attendees for events which have non-zero number of attendees if row[attendance_column_index] != '0': if row[event_name_column_index] in dict_list_event_names: retrieve_attendance(csv_writer, dict_list_event_names[row[event_name_column_index]], row[date_column_index], row[start_time_column_index], row[attendance_column_index]) else: logging.warning("Unrecognized event name '" + row[event_name_column_index] + "'") # If caller didn't specify input filename, then delete the temporary file we retrieved into if g.args.input_events_filename is None: if g.args.keep_temp_file: logging.info('Temporary downloaded calendared events CSV retained in file: ' + input_filename) else: os.remove(input_filename) logging.info('Event profile data written to ' + output_events_filename) logging.info('Attendance data written to ' + output_attendance_filename) util.sys_exit(0)