def upload_trash_can_data(endpoints): for endpoint in endpoints: if endpoint['date_param']: res = requests.get(BASE_URL + '/' + endpoint['path'] + '/' + os.environ['TRASH_CAN_KEY'] + '/' + get_api_date(execution_date)) else: res = requests.get(BASE_URL + '/' + endpoint['path'] + '/' + os.environ['TRASH_CAN_KEY']) if endpoint['filter_list']: data = filter_fields(res.json(), relevant_fields=endpoint['filter_list'], add_fields=False) json_to_gcs( '{}/{}/{}/{}_{}.json'.format(endpoint['path'].lower(), execution_date.split('-')[0], execution_date.split('-')[1], execution_date, endpoint['path'].lower()), data, bucket) else: json_to_gcs( '{}/{}/{}/{}_{}.json'.format(endpoint['path'].lower(), execution_date.split('-')[0], execution_date.split('-')[1], execution_date, endpoint['path'].lower()), data, bucket)
get_blotter_offenses (a.arrest_id, 'ARREST') AS Offenses, CASE WHEN is_sex_Crime (A.ARREST_ID, 'ARREST') = 'Y' THEN get_arr_incident_Zone_sexCrime (a.incident_nbr) ELSE get_arr_incident_address (a.incident_nbr) END AS IncidentLocation, CASE WHEN is_sex_Crime (A.ARREST_ID, 'ARREST') = 'Y' THEN NULL ELSE get_arr_incident_Neighborhood (a.incident_nbr) END AS IncidentNeighborhood, get_arr_incident_Zone (a.incident_nbr) AS IncidentZone, CASE WHEN is_sex_Crime (A.ARREST_ID, 'ARREST') = 'Y' THEN NULL ELSE get_arr_incident_Census (a.incident_nbr) END AS INCIDENTTRACT FROM arrest a, address ad WHERE a.arrest_id = ad.parent_table_id AND ad.parent_table_name = 'ARREST' AND ad.parent_Table_column = 'ARREST_ID' AND ad.address_type = 'ARREST' """ arrests = sql_to_dict_list(conn, blotter_query, db='oracle') conn.close() json_to_gcs( '30_day_blotter/{}/{}/{}_post_ucr_arrests.json'.format( args['execution_date'].split('-')[0], args['execution_date'].split('-')[1], args['execution_date']), arrests, bucket)
bucket = '{}_ems_fire'.format(os.environ['GCS_PREFIX']) FIELDS_TO_REMOVE = [ 'city_name', 'city_code', 'geoid', '_the_geom_webmercator', '_geom' ] quarter, year = execution_date_to_prev_quarter(args['execution_date']) where_clauses = '"city_name" = \'PITTSBURGH\' AND "call_quarter" = \'{}\' ' \ 'AND "call_year" = \'{}\''.format(quarter, year) ems_calls = get_wprdc_data(resource_id='ff33ca18-2e0c-4cb5-bdcd-60a5dc3c0418', where_clauses=where_clauses, fields_to_remove=FIELDS_TO_REMOVE) fire_calls = get_wprdc_data(resource_id='b6340d98-69a0-4965-a9b4-3480cea1182b', where_clauses=where_clauses, fields_to_remove=FIELDS_TO_REMOVE) json_to_gcs( 'ems/{}/{}/{}_ems.json'.format(args['execution_date'].split('-')[0], args['execution_date'].split('-')[1], args['execution_date']), ems_calls, bucket) json_to_gcs( 'fire/{}/{}/{}_fire.json'.format(args['execution_date'].split('-')[0], args['execution_date'].split('-')[1], args['execution_date']), fire_calls, bucket)
'id', 'notify', 'requestId', 'routeId', 'reasonId'] response = requests.get('https://pittsburghpa.qscend.com/qalert/api/v1/requests/changes', params=payload, headers=headers) trimmed_requests = [] trimmed_activities = [] # filter responses to take out unnecessary keys, preserving only those we've defined in request/activity_keys if response.status_code == 200: for request in response.json()['request']: trimmed_request = {k: request[k] for k in REQUEST_KEYS} trimmed_requests.append(trimmed_request) for activity in response.json()['activity']: trimmed_activity = {k: activity[k] for k in ACTIVITY_KEYS} trimmed_activities.append(trimmed_activity) json_to_gcs('requests/{}/{}/{}_requests.json'.format(now.strftime('%Y'), now.strftime('%m').lower(), now.strftime("%Y-%m-%d")), trimmed_requests, bucket) json_to_gcs('activities/{}/{}/{}_activities.json'.format(now.strftime('%Y'), now.strftime('%m').lower(), now.strftime("%Y-%m-%d")), trimmed_activities, bucket)
ON acct.acct_no = biz.acct_no LEFT JOIN(SELECT inn. * FROM (SELECT t2. *, ( Row_number() OVER( partition BY acct_no ORDER BY Decode(address_type, 'CL', 'CA', 'NCL', 'RA', 'RP' , 'TA', 'MA', 'OA', 'PRL')) ) rank FROM PITSDBA.it_tbl_m_taxpayer_contact t2) inn WHERE inn.rank = 1 AND status_address = 'V') con ON acct.acct_no = con.acct_no WHERE acct.acct_status = 'A' AND acct.account_type = 'B' ORDER BY acct.date_created ASC """ businesses = sql_to_dict_list(conn, businesses_query, db='oracle') conn.close() json_to_gcs( 'businesses/{}/{}/{}_businesses.json'.format( args['execution_date'].split('-')[0], args['execution_date'].split('-')[1], args['execution_date']), businesses, bucket)
parser = argparse.ArgumentParser() parser.add_argument('-e', '--execution_date', dest='execution_date', required=True, help='DAG execution date (YYYY-MM-DD)') parser.add_argument('-p', '--prev_execution_date', dest='prev_execution_date', required=True, help='Prev DAG execution date (YYYY-MM-DD)') args = vars(parser.parse_args()) bucket = '{}_parking'.format(os.environ['GCS_PREFIX']) parking_meters = get_wprdc_data( resource_id='9ed126cc-3c06-496e-bd08-b7b6b14b4109', fields_to_remove=["_geom", "guid", "_the_geom_webmercator"] ) parking_transactions = get_wprdc_data( resource_id='1ad5394f-d158-46c1-9af7-90a9ef4e0ce1', where_clauses='"utc_start" >= \'{}\''.format(args['prev_execution_date']), fields_to_remove=["_geom", "_the_geom_webmercator"] ) json_to_gcs('meters/{}/{}/{}_meters.json'.format(args['execution_date'].split('-')[0], args['execution_date'].split('-')[1], args['execution_date']), parking_meters, bucket) json_to_gcs('transactions/{}/{}/{}_transactions.json'.format(args['execution_date'].split('-')[0], args['execution_date'].split('-')[1], args['execution_date']), parking_transactions, bucket)
def get_report(report_name, time_conversion, dl_attempts, url, url_dl_base, workspace_id, targ_obj_id, url_report_base, workspace_name): """ Return the finalized report This function calls all helper functions to retrieve, format, and finalize the report Twilio dowloaded report from the API. All necessary steps for creating the final report are contained within this function.""" # initialize object IDs with unique identifier for each report object_id = {} for r in range(len(report_name)): object_id.update({report_name[r]: targ_obj_id[r]}) ## # define and receive API request for super secure token (sst) header_sst = { "Accept": "application/json", "Content-Type": "application/json" } payload_sst = { "postUserLogin": { "login": os.environ['TWILIO_EMAIL'], "password": os.environ['TWILIO_API_PW'], "remember": 0, "verify_level": 2 } } response_sst = requests.post(url["sst"], headers=header_sst, data=json.dumps(payload_sst)) token = {"sst": response_sst.json()['userLogin']['token']} ## # define and receive API request for temporary token (tt) header_tt = { "Accept": "application/json", "Content-Type": "application/json", "X-GDC-AuthSST": token["sst"] } response_tt = requests.get(url["tt"], headers=header_tt) token.update({"tt": response_tt.json()['userToken']['token']}) ## # define headers for API request for URI and the report download header_uri = { "Accept": "application/json", "Content-Type": "application/json", "Cookie": "GDCAuthTT={}".format(token["tt"]) } header_dl = {"Cookie": "GDCAuthTT={}".format(token["tt"])} ## # define the API request for the report download, execute the download, # and parse the raw download (a byte string literal) into a list (returns # the unformatted (raw) report # define the report URL url_report = url_report_base.format(workspace_id) raw_summary_report = prep_report_download(wkspc_id=workspace_id, obj=object_id["summary_report"], url_rep=url_report, hd_uri=header_uri, attempts=dl_attempts, hd_dl=header_dl, url_dl=url_dl_base) ## # format the raw summary report (list) into a DF # function call to format report formatted_summary_report = format_report_download( raw_report=raw_summary_report.copy(), conv_time=time_conversion.copy()) formatted_summary_report.columns = [ "date", "handled_conversations", "voicemails", "median_talk_time", "average_talk_time", "total_talk_time" ] ## # write the summary report to a new line delim JSON formatted_report = formatted_summary_report.to_dict(orient="records") ## # upload formatted report to Google Cloud json_to_gcs( '{}/{}/{}/{}_{}.json'.format(workspace_name, args['execution_date'].split('-')[0], args['execution_date'].split('-')[1], args['execution_date'], workspace_name), formatted_report, bucket) return formatted_report
NVL (get_blotter_zone (c.ccr), 'N/A') ZONE, NVL (GET_UCR_HIERARCHY_DESC (C.REPORT_ID), 'NA') AS HIERARCHY_DESC, GET_OFFENSES (C.REPORT_ID) AS OFFENSES, CASE WHEN is_sex_Crime (c.report_id) = 'Y' THEN NULL ELSE get_blotter_census (c.ccr) END AS tract FROM rpt_control c, rpt_offense_incident o WHERE c.report_name = ('OFFENSE 2.0') AND c.accepted_flag = 'Y' AND TRUNC (o.occurred_to_date) >= TRUNC (SYSDATE) - 30 AND c.report_id = o.report_id AND c.status IN ('REVIEWED AND ACCEPTED', 'REVIEWED AND ACCEPTED WITH CHANGES') """ blotter = sql_to_dict_list(conn, blotter_query, db='oracle') conn.close() json_to_gcs( '30_day_blotter/{}/{}/{}_blotter.json'.format( args['execution_date'].split('-')[0], args['execution_date'].split('-')[1], args['execution_date']), blotter, bucket)
REQUEST_KEYS = [ 'id', 'master', 'addDateUnix', 'lastActionUnix', 'dept', 'status', 'typeId', 'typeName', 'priorityValue', 'latitude', 'longitude', 'origin' ] ACTIVITY_KEYS = ['id', 'requestId', 'actDateUnix', 'code', 'codeDesc'] response = requests.get( 'https://pittsburghpa.qscend.com/qalert/api/v1/requests/changes', params=payload, headers=headers) # filter responses to take out unnecessary keys, preserving only those we've defined in request/activity_keys trimmed_requests = filter_fields(response.json()['request'], REQUEST_KEYS) trimmed_activities = filter_fields(response.json()['activity'], ACTIVITY_KEYS) json_to_gcs( 'requests/{}/{}/{}_requests.json'.format( args['execution_date'].split('-')[0], args['execution_date'].split('-')[1], args['execution_date']), trimmed_requests, bucket) json_to_gcs( 'activities/{}/{}/{}_activities.json'.format( args['execution_date'].split('-')[0], args['execution_date'].split('-')[1], args['execution_date']), trimmed_activities, bucket)
:return: (dict) JSON object with permit data """ res = requests.get(F"{BASE_URL}/{record_id}", headers={'Authorization': api_token}, params={'expand': EXPAND_FIELDS}) try: if res.status_code == 200: return res.json()['result'][0] elif res.status_code == 401: api_token = get_token() res = requests.get(F"{BASE_URL}/{record_id}", headers={'Authorization': api_token}, params={'expand': EXPAND_FIELDS}) return res.json()['result'][0] except requests.exceptions.RequestException: pass api_token = get_token() record_ids = get_all_record_ids(api_token) enriched_records = [] for record_id in record_ids: enriched_records.append(enrich_record(record_id, api_token)) json_to_gcs( 'permits/{}/{}/{}_permits.json'.format( args['execution_date'].split('-')[0], args['execution_date'].split('-')[1], args['execution_date']), enriched_records, bucket)
bucket = '{}_community_centers'.format(os.environ['GCS_PREFIX']) conn = pymssql.connect(host=os.environ['RECPRO_DB'], user=os.environ['RECPRO_UN'], password=os.environ['RECPRO_PW'], database='recpro') attendance_query = """ SELECT CAST(DATEADD(DAY, DATEDIFF(DAY, 0, MemUse.Date_Time), 0) AS DATE) AS Date, Center.Name as CenterName, COUNT(MemUse.CardNumber) AttendanceCount FROM [recpro].[dbo].[MembershipUse] as MemUse LEFT JOIN [recpro].[dbo].[Facilities] as Center ON MemUse.Location = Center.ID WHERE MemUse.Date_Time < DATEDIFF(DAY, 0, GETDATE()) AND MemUse.Date_Time > '2011-03-06' AND Center.Name IS NOT NULL GROUP BY DATEADD(DAY, DATEDIFF(DAY, 0, MemUse.Date_Time), 0), Center.Name ORDER BY Date DESC; """ attendance_results = sql_to_dict_list(conn, attendance_query, date_col='Date', date_format='%y-%m-%d') conn.close() json_to_gcs( 'attendance/{}/{}/{}_attendance.json'.format( args['execution_date'].split('-')[0], args['execution_date'].split('-')[1], args['execution_date']), attendance_results, bucket)
'COMMERCIALORRESIDENTIAL', 'COMPLETEDDATE', 'NOPARKINGAUTHORIZATION', 'DETOUR', 'NUMBEROFDUMPSTERS', 'NUMBEROFMACHINES', 'SPECIALPERMITINSTRUCTIONS', 'APPLICANTCUSTOMEROBJECTID', 'STATUSDESCRIPTION', 'EFFECTIVEDATE', 'EXPIRATIONDATE', 'WORKDATESFROM', 'WORKDATESTO' ] EXPAND_FIELDS = [ 'ADDRESS', 'LOCATION', 'PERMITTYPE', 'PROJECT', 'DOMISTREETCLOSURE' ] domi_permits = get_computronix_odata('DOMIPERMIT', expand_fields=EXPAND_FIELDS) trimmed_permits = filter_fields(domi_permits, RELEVANT_FIELDS) json_to_gcs('domi_permits/{}/{}/{}_domi_permits.json'.format(args['execution_date'].split('-')[0], args['execution_date'].split('-')[1], args['execution_date']), trimmed_permits, bucket)