def get(self): dateref = self.request.get('dateref') def prepare_and_send_email(): to = cfg['notification_email'] if not to: logging.info("No notification email has been setup.") return sender = 'noreply@{}.appspotmail.com'.format(project_id) subject = 'A BVI process has failed.' image_link = "https://{}.appspot.com/images/google-cloud.png".format(project_id) template_values = { 'project': project_id, 'day': dateref, 'image_link': image_link } template_path = 'email_templates/processfailure.html' send_email(template_values, template_path, subject, sender, to) scopes = cfg['scopes']['big_query'] project_id = cfg['ids']['project_id'] bigquery = createBigQueryService(scopes, 'bigquery', 'v2') logging.info('Checking the processes for [{}]'.format(dateref)) query = "SELECT report_date, status FROM logs.status_board WHERE report_date = \'" + dateref + "\'" result = fetch_big_query_data(bigquery, project_id, query, 10) if 'rows' in result: rows = convert_big_query_result(result, DAILY_STATUS) if 'rows' not in result or len(rows) == 0: result = None logging.info("There is no result for daily status so there is nothing to do.") elif dateref == rows[0]['report_date'] and rows[0]['status'] == '0': result = True logging.info("All processes for the day[{}] went well, so no email to be sent.".format(dateref)) else: result = False logging.info("There is something wrong so an email will be sent to the admin.") prepare_and_send_email() try: logging.info("Writing daily run results to BQ for the day[{}]...".format(dateref)) row = [{u'executionTime': datetime.utcnow().strftime("%s"), u'report_date': dateref, u'result': result}] writeDatainBigQuery(row, 'daily_report_status') except Exception: logging.info("Error writing daily run results to BQ for the day[{}].".format(dateref))
def get(self): logging.info('Create Volume Report') sDate = self.request.get('sDate') eDate = self.request.get('eDate') start_date = date(int(sDate.split('-')[0]), int(sDate.split('-')[1]), int(sDate.split('-')[2])) end_date = date(int(eDate.split('-')[0]), int(eDate.split('-')[1]), int(eDate.split('-')[2])) date_delta = timedelta(days=1) scopes = cfg['scopes']['big_query'] project_id = cfg['ids']['project_id'] bigquery = createBigQueryService(scopes, 'bigquery', 'v2') for table in bigquery_setup['tables']: logging.info('Start generating volume info for table [{}]'.format( table['name'])) day_date = start_date while day_date <= end_date: logging.info("Generating volume for date {}".format( day_date.strftime("%Y-%m-%d"))) yyyy, mm, dd = day_date.strftime("%Y-%m-%d").split("-") decorator_date = "{yyyy}{mm}{dd}".format(yyyy=yyyy, mm=mm, dd=dd) table_name_decorator = table['name'] + '$' + decorator_date table_level = table['level'] if ('level' in table) else 'raw_data' table_type = table['type'] info = fetch_big_query_table_info(bigquery, project_id, table['dataset'], table_name_decorator) num_rows = info['numRows'] if ('numRows' in info) else '0' num_bytes = info['numBytes'] if ('numBytes' in info) else '0' log_info = "TABLE_INFO: TABLE:{}, TABLE_DECORATOR:{}, LEVEL:{}, " \ "TYPE:{}, NUM_ROWS:{}, NUM_BYTES:{}".format( table['name'], table_name_decorator, table_level, table_type, num_rows, num_bytes) logging.info(log_info) day_date += date_delta
def get(self): def prepare_and_send_email(): to = cfg['notification_email'] if not to: logging.info("No notification email has been setup.") return sender = 'noreply@{}.appspotmail.com'.format(project_id) subject = 'A BVI process has failed.' image_link = "https://{}.appspot.com/images/google-cloud.png".format(project_id) template_values = { 'day': day, 'image_link': image_link } template_path = 'email_templates/processfailure.html' send_email(template_values, template_path, subject, sender, to) scopes = cfg['scopes']['big_query'] project_id = cfg['ids']['project_id'] bigquery = createBigQueryService(scopes, 'bigquery', 'v2') today = date.today() day = today.strftime("%Y-%m-%d") logging.info('Checking the processes for [{}]'.format(day)) query = "SELECT date, status, value FROM logs.daily_status ORDER BY date desc LIMIT 1" result = fetch_big_query_data(bigquery, project_id, query, 10) rows = convert_big_query_result(result, DAILY_STATUS) if len(rows) == 0: logging.info("There is no result for daily status so there is nothing to do.".format(day)) elif day == rows[0]['date'] and rows[0]['status'] == 'SUCCESS': logging.info("All processes for the day[{}] went well, so no email to be sent.".format(day)) else: logging.info("There is something wrong so an email will be sent to the admin.".format(day)) prepare_and_send_email()
def get(self): project_id = cfg['ids']['project_id'] # getting request parameters exec_type = self.request.get('type', 'daily') step = self.request.get('step') begin_step = self.request.get('begin_step') dateref = self.request.get('dateref') start_date = self.request.get('Sdate') end_date = self.request.get('Edate') auto_recover = self.request.get('auto_recover', False) if auto_recover == 'False' or not auto_recover: auto_recover = False enable_auto_recover = self.request.get('enable_auto_recover', True) if enable_auto_recover == 'False' or not enable_auto_recover: enable_auto_recover = False if should_check_for_auto_recover() and enable_auto_recover and exec_type == 'daily' \ and step == 'first' and begin_step: # verifying if an error occurred in the last days, only in every 'frequency' days logging.info("[auto-recover] Verifying need to execute auto-recover...") bigquery = createBigQueryService(cfg['scopes']['big_query'], 'bigquery', 'v2') query = "SELECT MIN(min_date) as first_fail FROM [{}:logs.errors_dashboard]".format(project_id) result = fetch_big_query_data(bigquery, project_id, query, 10) rows = convert_big_query_result(result, ERROR_BEGIN) if len(rows) == 1 and rows[0]['first_fail']: exec_type = 'historical' start_date = rows[0]['first_fail'] end_date = dateref auto_recover = True logging.info("[auto-recover] Error occurred in a previous day, moving to historical execution to run " + "again since the first failed execution date. " + "auto-recover starting from {}".format(start_date)) else: logging.info("[auto-recover] Not needed, no errors found in last {} days.".format( cfg['auto_recover']['days_lookback'])) log_date = dateref if exec_type == 'daily': date_params = '&dateref={}&enable_auto_recover={}'.format(dateref, str(enable_auto_recover)) elif exec_type == 'historical': date_params = '&Sdate={}&Edate={}&enable_auto_recover={}'.format(start_date, end_date, str(enable_auto_recover)) log_date = start_date if step == 'first' and begin_step: bvi_log(date=log_date, resource='exec_manager', message_id='start', message='Start of BVI {} execution'.format(exec_type)) with open('manager.yaml', 'r') as mgrymlfile: mgr = yaml.load(mgrymlfile) exec_manager_queue = cfg['queues']['exec_manager'] if begin_step: if exec_type == 'daily': bvi_log(date=log_date, resource=get_log_step(step), message_id='start', message='Start of {} step'.format(step)) endpoint = mgr[step]['endpoint'].replace('from_cron', dateref) taskqueue.add(queue_name=exec_manager_queue, url=endpoint, method='GET') elif exec_type == 'historical': exec_historical(mgr, step, start_date, end_date) # wait for tasks to be created in the queue time.sleep(15) count_tasks = count_queued_tasks(mgr[step].get('queues')) if count_tasks == 0 and 'next_step' in mgr[step] and mgr[step]['next_step']: # Finished all tasks from this step if auto_recover and 'missing_data_table' in mgr[step]: # Check if the auto-recover was successful logging.info("[auto-recover] Checking for effectiveness...") lookback_date_obj = date.today() - timedelta(days=cfg['auto_recover']['days_lookback']) lookback_date = lookback_date_obj.strftime("%Y-%m-%d") bigquery = createBigQueryService(cfg['scopes']['big_query'], 'bigquery', 'v2') check_query = "SELECT MIN(report_date) AS report_date FROM [{}:{}] " \ "WHERE report_date > \'{}\'".format(project_id, mgr[step]['missing_data_table'], lookback_date) check_result = fetch_big_query_data(bigquery, project_id, check_query, 10) if 'rows' in check_result: check_rows = convert_big_query_result(check_result, CHECK_ROW) if len(check_rows) == 1 and check_rows[0]['report_date']: min_error_date = check_rows[0]['report_date'] min_error_date_obj = datetime.strptime(min_error_date, "%Y-%m-%d").date() start_date_obj = datetime.strptime(start_date, "%Y-%m-%d").date() if min_error_date_obj > start_date_obj: logging.info( "[auto-recover] Min error date for '{}' ".format(mgr[step]['missing_data_table']) + "is greater than start_date, auto-recover should proceed.") else: logging.info("[auto-recover] Could not fix any missing data for '{}'. ".format( mgr[step]['missing_data_table']) + "Reverting to daily ({}) execution.".format(end_date)) exec_type = 'daily' date_params = '&dateref={}&enable_auto_recover=False'.format(end_date) auto_recover = False else: logging.info( "[auto-recover] No missing data for '{}', auto-recover should proceed.".format( mgr[step]['missing_data_table'])) else: logging.info( "[auto-recover] No missing data for '{}', auto-recover should proceed.".format( mgr[step]['missing_data_table'])) logging.info("[auto-recover] Finished checking for effectiveness.") # Execute next step bvi_log(date=log_date, resource=get_log_step(step), message_id='end', message='End of {} step'.format(get_log_step(step))) taskqueue.add(queue_name=exec_manager_queue, url='/exec_manager?type={}{}&step={}&begin_step=True&auto_recover={}'.format( exec_type, date_params, mgr[step]['next_step'], auto_recover), method='GET') elif count_tasks > 0: # Still executing tasks, just schedule to monitor task queues again 10 seconds later logging.info("Waiting for tasks to finish...") taskqueue.add(queue_name=exec_manager_queue, url='/exec_manager?type={}{}&step={}&auto_recover={}'.format( exec_type, date_params, step, auto_recover), method='GET', countdown=10) else: # Finished ALL tasks bvi_log(date=log_date, resource='exec_manager', message_id='end', message='End of BVI {} execution'.format(exec_type))
def get(self): op = self.request.get('op') scopes = cfg['scopes']['big_query'] bigquery = createBigQueryService(scopes, 'bigquery', 'v2') if op == "list_projects": self.response.write( list_projects(bigquery) ) elif op == "create_datasets": for dataset_def in bigquery_setup['datasets']: try: destination_dataset = dataset_def['name'] if exists_dataset( bigquery, cfg['ids']['project_id'], destination_dataset=destination_dataset, num_retries=5) == False: try: create_dataset( bigquery, cfg['ids']['project_id'], description=dataset_def['description'], destination_dataset=destination_dataset, num_retries=5) salida = "<b>{destination_dataset}</b> created <br>".format( destination_dataset=destination_dataset, ) self.response.write(salida) except gHttpError as err: self.response.write( "Cant create {dataset}".format(dataset=destination_dataset) ) return except HTTPError as err: self.response.write( "Cant create {dataset}".format(dataset=destination_dataset) ) return else: salida = "<b>{destination_dataset}</b> already exists <br><hr/>".format( destination_dataset=destination_dataset, ) self.response.write(salida) except HTTPError as err: self.response.write('Error in get: %s' % err.content) elif op == "create_tables": create_tables_from_list(self=self, bigquery=bigquery, folder=bigquery_setup['folder'], tables_list=bigquery_setup['tables'], op=op) elif op == "create_custom_schemas": create_tables_from_list(self=self, bigquery=bigquery, folder=bigquery_custom_schemas_setup['folder'], tables_list=bigquery_custom_schemas_setup['tables'], op=op) elif op == "create_survey_tables": create_tables_from_list(self=self, bigquery=bigquery, folder=bigquery_survey_setup['folder'], tables_list=bigquery_survey_setup['tables'], op=op) elif op == "create_logs_tables": create_tables_from_list(self=self, bigquery=bigquery, folder=bigquery_logs_setup['folder'], tables_list=bigquery_logs_setup['tables'], op=op) elif op == "create_billing_view": create_tables_from_list(self=self, bigquery=bigquery, folder=bigquery_logs_setup['folder'], tables_list=bigquery_billing_setup['tables'], op=op) elif op == "update": update_data_level(self, bigquery, 'update', bigquery_setup) elif op == "custom_update": if not custom_fields_empty(bigquery): update_data_level(self, bigquery, 'custom_update', bigquery_custom_schemas_setup) elif op == "__dir__": options = [ "<a href='{base}?op={op}' target='_blank'>{op}</a>".format(base="/bq_api", op="list_projects"), "<a href='{base}?op={op}' target='_blank'>{op}</a>".format(base="/bq_api", op="create_datasets"), "<a href='{base}?op={op}' target='_blank'>{op}</a>".format(base="/bq_api", op="create_tables"), "<a href='{base}?op={op}' target='_blank'>{op}</a>".format(base="/bq_api", op="create_survey_tables"), "<a href='{base}?op={op}' target='_blank'>{op}</a>".format(base="/bq_api", op="create_custom_schemas"), "<a href='{base}?op={op}' target='_blank'>{op}</a>".format(base="/bq_api", op="create_logs_tables"), "<a href='{base}?op={op}' target='_blank'>{op}</a>".format(base="/bq_api", op="create_billing_view"), "<a href='{base}?op={op}' target='_blank'>{op}</a>".format(base="/bq_api", op="update"), ] salida = "<br/>".join(options) self.response.write(salida) else: #illegal option self.response.write("So sorry, but I didn't understand that order")