def import_cell_tower_data_raw(self): provider_prefix = self.config.provider_prefix arg_cell_raw = self.data.arg_cell_raw input_cell_tower_delimiter = self.config.input_cell_tower_delimiter input_cell_tower_have_header = self.config.input_cell_tower_have_header input_cell_tower_files = self.config.input_cell_tower_files hadoop_data_path = self.config.hadoop_data_path cursor = self.hc.cursor print('########## IMPORT RAW MAPPING TABLE ##########') print('Checking and dropping raw mapping table if existing.') timer = time.time() cursor.execute( 'DROP TABLE IF EXISTS {provider_prefix}_cell_tower_data_raw'. format(provider_prefix=provider_prefix)) print( 'Checked and dropped raw mapping table if existing. Elapsed time: {} seconds' .format(format_two_point_time(timer, time.time()))) timer = time.time() print('Creating raw mapping table') raw_query = sql_to_string('cdr_and_mapping/create_raw_mapping.sql') query = raw_query.format(provider_prefix=provider_prefix, arg_raw=', '.join(arg_cell_raw), field_delimiter=input_cell_tower_delimiter, have_header=input_cell_tower_have_header) cursor.execute(query) print('Created raw mapping table. Elapsed time: {} seconds'.format( format_two_point_time(timer, time.time()))) timer = time.time() if len(input_cell_tower_files) < 1: print( 'Please check the input_cell_tower_files field in config.json and make sure the file is valid.' ) return elif len(input_cell_tower_files) == 1: cursor.execute( "load data local inpath '{hadoop_data_path}{hadoop_data_file}' " .format(hadoop_data_path=hadoop_data_path, hadoop_data_file=input_cell_tower_files[0]) + "overwrite into table {provider_prefix}_cell_tower_data_raw". format(provider_prefix=provider_prefix)) else: cursor.execute( "load data local inpath '{hadoop_data_path}{hadoop_data_file}' " .format(hadoop_data_path=hadoop_data_path, hadoop_data_file=input_cell_tower_files[0]) + "overwrite into table {provider_prefix}_cell_tower_data_raw". format(provider_prefix=provider_prefix)) for i in range(1, len(input_cell_tower_files)): cursor.execute( "load data local inpath '{hadoop_data_path}{hadoop_data_file}' " .format(hadoop_data_path=hadoop_data_path, hadoop_data_file=input_cell_tower_files[i]) + "into table {provider_prefix}_cell_tower_data_raw".format( provider_prefix=provider_prefix)) print('Imported to raw mapping table. Elapsed time: {} seconds'.format( format_two_point_time(timer, time.time()))) print('########## FINISHED IMPORTING TO RAW MAPPING TABLE ##########')
def main(): # argument parser start = time.time() parser = argparse.ArgumentParser( description='Argument indicating the configuration file') # add configuration argument parser.add_argument( "-c", "--config", help="add a configuration file you would like to process the cdr data" " \n ex. py py_hive_connect.py -c config.json", action="store") # parse config to args.config args = parser.parse_args() config = Config(args.config) hc = HiveConnection(host=config.host, port=config.port, user=config.user) # initialize hive and create tables table_creator = HiveTableCreator(config) table_creator.initialize( 'hive_init_commands/initial_hive_commands_interpolation.json' ) # mandatory (init hive) # init interpolation generators it = Interpolation(config) # interpolation it.calculate_interpolation() print('Overall time elapsed: {} seconds'.format( format_two_point_time(start, time.time())))
def initialize(self, init_cmd_file): print('########## Initilizing Hive ##########') timer = time.time() output_report_location = self.config.output_report_location output_graph_location = self.config.output_graph_location cursor = self.hc.cursor for command in json_file_to_object(init_cmd_file)['hive_commands']: if command.startswith('use'): command = command.format(db_name=self.config.db_name) elif '{poi_location}' in command: command = command.format( poi_location=self.config.interpolation_poi_file_location) elif '{osm_location}' in command: command = command.format( osm_location=self.config.interpolation_osm_file_location) elif '{voronoi_location}' in command: command = command.format(voronoi_location=self.config. interpolation_voronoi_file_location) cursor.execute(command) if not os.path.exists(output_report_location): os.makedirs(output_report_location) if not os.path.exists(output_graph_location): os.makedirs(output_graph_location) print('########## Done. Time elapsed: {} seconds ##########'.format( format_two_point_time(timer, time.time())))
def calculate_indicator(cursor,package,provider_prefix,table_name,cdr_data_table,timer): print('Calculating {provider_prefix}_{table_name} table'.format(provider_prefix=provider_prefix,table_name=table_name)) raw_sql = sql_to_string('{package}/{table_name}.sql'.format(package=package,table_name=table_name)) query = raw_sql.format(provider_prefix=provider_prefix, cdr_data_table=cdr_data_table) cursor.execute(query) print('Created {provider_prefix}_{table_name} table. Elapsed time: {time} seconds' .format(provider_prefix=provider_prefix,table_name=table_name, time=format_two_point_time(timer, time.time())))
def create_od_sum(self): provider_prefix = self.config.provider_prefix cursor = self.hc.cursor print('########## CREATING OD SUM TABLE ##########') timer = time.time() print( 'Checking and dropping {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od_sum table if existing.' .format(provider_prefix=provider_prefix)) cursor.execute( 'DROP TABLE IF EXISTS {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od_sum ' .format(provider_prefix=provider_prefix)) print( 'Checked and dropped {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od_sum table if existing. ' 'Elapsed time: {time} seconds'.format( provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) print( 'Creating {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od_sum table' .format(provider_prefix=provider_prefix)) raw_sql = sql_to_string( 'origin_destination/create_la_cdr_all_with_ant_zone_by_uid_od_sum.sql' ) query = raw_sql.format(provider_prefix=provider_prefix) cursor.execute(query) print( 'Created {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od_sum table' .format(provider_prefix=provider_prefix)) timer = time.time() print( 'Inserting into {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od_sum table' .format(provider_prefix=provider_prefix)) raw_sql = sql_to_string( 'origin_destination/insert_la_cdr_all_with_ant_zone_by_uid_od_sum.sql' ) query = raw_sql.format(provider_prefix=provider_prefix) cursor.execute(query) print( 'Inserted into {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od_sum table. ' 'Elapsed time: {time} seconds'.format( provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) raw_sql = sql_to_string('origin_destination/od_to_csv.sql') query = raw_sql.format(provider_prefix=provider_prefix) cursor.execute(query) print('OD Result is stored in /tmp/hive/od_result') print('########## FINISHED CREATING OD SUM TABLE ##########')
def create_route_interpolation(self): provider_prefix = self.config.provider_prefix cursor = self.hc.cursor print('########## CREATE ROUTE INTERPOLATION TABLE ##########') timer = time.time() print( 'Checking and dropping {provider_prefix}_cdr_by_uid_trip_routing_array_apd table if existing.' .format(provider_prefix=provider_prefix)) cursor.execute( 'DROP TABLE IF EXISTS {provider_prefix}_cdr_by_uid_trip_routing_array_apd' .format(provider_prefix=provider_prefix)) print( 'Checked and dropped {provider_prefix}_cdr_by_uid_trip_routing_array_apd table if existing. ' 'Elapsed time: {time} seconds'.format( provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) timer = time.time() print( 'Creating {provider_prefix}_cdr_by_uid_trip_routing_array_apd table' .format(provider_prefix=provider_prefix)) raw_sql = sql_to_string('interpolation/create_route_interpolation.sql') query = raw_sql.format(provider_prefix=provider_prefix) cursor.execute(query) print( 'Created {provider_prefix}_cdr_by_uid_trip_routing_array_apd table. Elapsed time: {time} seconds' .format(provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) timer = time.time() raw_sql = sql_to_string('interpolation/insert_route_interpolation.sql') print( 'Inserting into {provider_prefix}_cdr_by_uid_trip_routing_array_apd table' .format(provider_prefix=provider_prefix)) query = raw_sql.format( provider_prefix=provider_prefix, max_size_interpolation=self.config.max_size_interpolation, osm=self.config.interpolation_osm_file_location.split('/')[-1], voronoi=self.config.interpolation_voronoi_file_location.split( '/')[-1]) cursor.execute(query) print( 'Inserted into {provider_prefix}_cdr_by_uid_trip_routing_array_apd table. Elapsed time: {time} seconds' .format(provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) print('########## FINISHED ROUTE INTERPOLATION TABLE ##########')
def create_od(self): provider_prefix = self.config.provider_prefix od_admin_unit = self.config.od_admin_unit cursor = self.hc.cursor print('########## CREATE OD TABLE ##########') timer = time.time() print( 'Checking and dropping {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od table if existing.' .format(provider_prefix=provider_prefix)) cursor.execute( 'DROP TABLE IF EXISTS {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od' .format(provider_prefix=provider_prefix)) print( 'Checked and dropped {provider_prefix}_la_cdr_all_with_ant_zone_by_uid table if existing.' ' Elapsed time: {time} seconds'.format( provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) print( 'Creating {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od table' .format(provider_prefix=provider_prefix)) timer = time.time() raw_sql = sql_to_string( 'origin_destination/create_la_cdr_all_with_ant_zone_by_uid_od.sql') query = raw_sql.format(provider_prefix=provider_prefix) cursor.execute(query) print( 'Created {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od table. Elapsed time: {time}' .format(provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) timer = time.time() print( 'Inserting into {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od table' .format(provider_prefix=provider_prefix)) raw_sql = sql_to_string( 'origin_destination/insert_la_cdr_all_with_ant_zone_by_uid_od.sql') query = raw_sql.format(provider_prefix=provider_prefix, target_unit=od_admin_unit) cursor.execute(query) print( 'Inserted into {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od table. Elapsed time: {time} seconds' .format(provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) print('########## FINISHED CREATING OD TABLE ##########')
def preprocess_cell_tower_data(self): provider_prefix = self.config.provider_prefix check_duplicate = self.config.check_duplicate arg_cell_create = self.data.arg_cell_create arg_cell_map = self.data.arg_cell_map cursor = self.hc.cursor print('########## CREATE PREPROCESS MAPPING TABLE ##########') if check_duplicate: distinct = 'distinct' else: distinct = '' print('Checking and dropping preprocess mapping table if existing.') timer = time.time() cursor.execute( 'DROP TABLE IF EXISTS {provider_prefix}_cell_tower_data_preprocess' .format(provider_prefix=provider_prefix)) print( 'Checked and dropped preprocess mapping table if existing. Elapsed time: {} seconds' .format(format_two_point_time(timer, time.time()))) timer = time.time() print('Creating preprocess mapping table') raw_sql = sql_to_string( 'cdr_and_mapping/create_preprocess_mapping.sql') query = raw_sql.format(provider_prefix=provider_prefix, arg_create=', '.join(arg_cell_create)) cursor.execute(query) print('Created mapping preprocess table. Elapsed time: {} seconds'. format(format_two_point_time(timer, time.time()))) timer = time.time() # need username to get privilege print('Inserting into preprocess mapping table') raw_sql = sql_to_string( 'cdr_and_mapping/insert_preprocess_mapping.sql') query = raw_sql.format(provider_prefix=provider_prefix, distinct=distinct, arg=', '.join(arg_cell_map)) cursor.execute(query) print( 'Inserted into preprocess mapping table. Elapsed time: {} seconds'. format(format_two_point_time(timer, time.time()))) print( '########## FINISHED CREATING PREPROCESS MAPPING TABLE ##########')
def cell_tower_data_admin(self, admin): provider_prefix = self.config.provider_prefix check_invalid_lat_lng = self.config.check_invalid_lat_lng cursor = self.hc.cursor print('########## CREATE MAPPING ADMIN TABLE ##########') if check_invalid_lat_lng: check_lat_lng = 'and (latitude != 0 or longitude != 0) and latitude is not NULL and longitude is not NULL' else: check_lat_lng = '' print( 'Checking and dropping mapping {admin} table if existing.'.format( admin=admin)) timer = time.time() cursor.execute( 'DROP TABLE IF EXISTS {provider_prefix}_cell_tower_data_{admin}'. format(provider_prefix=provider_prefix, admin=admin)) print( 'Check and drop mapping {admin} table if existing. Elapsed time: {time} seconds' .format(admin=admin, time=format_two_point_time(timer, time.time()))) timer = time.time() print('Creating mapping {admin} table'.format(admin=admin)) raw_sql = sql_to_string('cdr_and_mapping/create_mapping_admin.sql') query = raw_sql.format(provider_prefix=provider_prefix, admin=admin) cursor.execute(query) print('Created mapping {admin} table. Elapsed time: {time} seconds'. format(admin=admin, time=format_two_point_time(timer, time.time()))) timer = time.time() print('Inserting into mapping {} table'.format(admin)) raw_sql = sql_to_string('cdr_and_mapping/insert_mapping_admin.sql') query = raw_sql.format(provider_prefix=provider_prefix, admin=admin, check_lat_lng=check_lat_lng) cursor.execute(query) print( 'Inserted into mapping {admin} table. Elapsed time: {time} seconds' .format(admin=admin, time=format_two_point_time(timer, time.time()))) print('########## FINISHED CREATING MAPPING ADMIN TABLE ##########')
def preprocess_data(self): provider_prefix = self.config.provider_prefix check_duplicate = self.config.check_duplicate arg_cdr_prep = self.data.arg_cdr_prep arg_cdr_map = self.data.arg_cdr_map cursor = self.hc.cursor print('########## CREATE PREPROCESS CDR TABLE ##########') if check_duplicate: distinct = 'distinct' else: distinct = '' print('Checking and dropping preprocess cdr table if existing.') timer = time.time() cursor.execute( 'DROP TABLE IF EXISTS {provider_prefix}_preprocess'.format( provider_prefix=provider_prefix)) print( 'Checked and dropped preprocess cdr table if existing. Elapsed time: {} seconds' .format(format_two_point_time(timer, time.time()))) timer = time.time() print('Creating preprocess cdr table.') raw_sql = sql_to_string('cdr_and_mapping/create_preprocess_cdr.sql') query = raw_sql.format(args=', '.join(arg_cdr_prep), provider_prefix=provider_prefix) cursor.execute(query) print('Created preprocess cdr table. Elapsed time: {} seconds'.format( format_two_point_time(timer, time.time()))) timer = time.time() print('Inserting into preprocess table') print('Columns in preprocess table mapped: ' + ', '.join(arg_cdr_map)) raw_sql = sql_to_string('cdr_and_mapping/insert_preprocess_cdr.sql') query = raw_sql.format(distinct=distinct, arg=', '.join(arg_cdr_map), provider_prefix=provider_prefix) cursor.execute(query) print('Inserted into preprocess cdr table. Elapsed time: {} seconds'. format(format_two_point_time(timer, time.time()))) print('########## FINISHED CREATING PREPROCESS CDR TABLE ##########')
def create_trip_format(self): provider_prefix = self.config.provider_prefix cursor = self.hc.cursor print( '########## CREATE CDR BY UID ARRAY TRIP FORMAT TABLE ##########') timer = time.time() print( 'Checking and dropping {provider_prefix}_cdr_by_uid_trip table if existing.' .format(provider_prefix=provider_prefix)) cursor.execute( 'DROP TABLE IF EXISTS {provider_prefix}_cdr_by_uid_trip'.format( provider_prefix=provider_prefix)) print( 'Checked and dropped {provider_prefix}_cdr_by_uid_trip table if existing. ' 'Elapsed time: {time} seconds'.format( provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) timer = time.time() print('Creating {provider_prefix}_cdr_by_uid_trip table'.format( provider_prefix=provider_prefix)) raw_sql = sql_to_string('interpolation/create_trip_format.sql') query = raw_sql.format(provider_prefix=provider_prefix) cursor.execute(query) print( 'Created {provider_prefix}_cdr_by_uid_trip table. Elapsed time: {time} seconds' .format(provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) timer = time.time() raw_sql = sql_to_string('interpolation/insert_trip_format.sql') print('Inserting into {provider_prefix}_cdr_by_uid_trip table'.format( provider_prefix=provider_prefix)) query = raw_sql.format(provider_prefix=provider_prefix) cursor.execute(query) print( 'Inserted into {provider_prefix}_cdr_by_uid_trip table. Elapsed time: {time} seconds' .format(provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) print( '########## FINISHED CREATING CDR BY UID TRIP FORMAT TABLE ##########' )
def main(): # argument parser start = time.time() parser = argparse.ArgumentParser( description='Argument indicating the configuration file') # add configuration argument parser.add_argument( "-c", "--config", help="add a configuration file you would like to process the cdr data" " \n ex. py run_wb_indicators.py -c config.json", action="store") # parse config to args.config args = parser.parse_args() config = Config(args.config) HiveConnection(host=config.__dict__["host"], port=config.__dict__["port"], user=config.__dict__["user"]) table_creator = HiveTableCreator(config) table_creator.initialize( 'hive_init_commands/initial_hive_commands_wb_indicators.json' ) # mandatory (init hive) # init indicators generators ig = WBIndicators(config) # user section here # run command ig.calculate_indicator_01_02_03() ig.calculate_indicator_06_11() ig.calculate_indicator_04() ig.calculate_indicator_09( ) # need to run after "calculate_indicator_06_11" ig.calculate_indicator_10() # need to run after "calculate_indicator_09" ig.calculate_indicator_05() # need to run after "calculate_indicator_10" ig.calculate_indicator_07_08( ) # need to run after "calculate_indicator_06_11,calculate_indicator_10" # export output ig.export_indicator_01_02_03() ig.export_indicator_06_11() ig.export_indicator_04() ig.export_indicator_09() ig.export_indicator_10() ig.export_indicator_05() ig.export_indicator_07_08() print('Overall time elapsed: {} seconds'.format( format_two_point_time(start, time.time())))
def create_trip_24hr_padding(self): provider_prefix = self.config.provider_prefix cursor = self.hc.cursor print('########## CREATE TRIP 24 HR PADDING TABLE ##########') timer = time.time() print( 'Checking and dropping {provider_prefix}_cdr_by_uid_trip_organized_array_apd table if existing.' .format(provider_prefix=provider_prefix)) cursor.execute( 'DROP TABLE IF EXISTS {provider_prefix}_cdr_by_uid_trip_organized_array_apd' .format(provider_prefix=provider_prefix)) print( 'Checked and dropped {provider_prefix}_cdr_by_uid_trip_organized_array_apd table if existing. ' 'Elapsed time: {time} seconds'.format( provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) timer = time.time() print( 'Creating {provider_prefix}_cdr_by_uid_trip_organized_array_apd table' .format(provider_prefix=provider_prefix)) raw_sql = sql_to_string('interpolation/create_trip_24_hr_padding.sql') query = raw_sql.format(provider_prefix=provider_prefix) cursor.execute(query) print( 'Created {provider_prefix}_cdr_by_uid_trip_organized_array_apd table. Elapsed time: {time} seconds' .format(provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) timer = time.time() raw_sql = sql_to_string('interpolation/insert_trip_24_hr_padding.sql') print( 'Inserting into {provider_prefix}_cdr_by_uid_trip_organized_array_apd table' .format(provider_prefix=provider_prefix)) query = raw_sql.format(provider_prefix=provider_prefix) cursor.execute(query) print( 'Inserted into {provider_prefix}_cdr_by_uid_trip_organized_array_apd table. Elapsed time: {time} seconds' .format(provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) print('########## FINISHED TRIP 24 HR PADDING TABLE ##########')
def export_to_csv(self): provider_prefix = self.config.provider_prefix cursor = self.hc.cursor print('########## Exporting route interpolation to CSV ##########') timer = time.time() raw_sql = sql_to_string('interpolation/export_to_gps_format.sql') query = raw_sql.format(provider_prefix=provider_prefix) cursor.execute(query) print('Exported to CSV. Elapsed time: {time} seconds'.format( provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) print( '########## FINISHED EXPORTING, FILE LOCATED IN /tmp/hive/cdr_interpolation ##########' )
def create_tables(self): print('########## Creating Tables ##########') timer = time.time() self.import_cell_tower_data_raw() self.preprocess_cell_tower_data() admins = get_admin_units_from_mapping(self.config.cdr_cell_tower) for admin in admins: self.cell_tower_data_admin(admin) self.import_raw() self.preprocess_data() self.consolidate_table() print( '########## Done create all tables. Time elapsed: {} seconds ##########' .format(format_two_point_time(timer, time.time())))
def main(): # argument parser start = time.time() parser = argparse.ArgumentParser(description='Argument indicating the configuration file') # add configuration argument parser.add_argument("-c", "--config", help="add a configuration file you would like to process the cdr data" " \n ex. py py_hive_connect.py -c config.json", action="store") # parse config to args.config args = parser.parse_args() config = Config(args.config) HiveConnection(host=config.host, port=config.port, user=config.user) table_creator = HiveTableCreator(config) table_creator.initialize('hive_init_commands/initial_hive_commands_stats.json') # mandatory (init hive) # init stats generators st = Statistics(config) # user section here # reports st.calculate_data_statistics() st.calculate_daily_statistics() st.calculate_monthly_statistics() st.calculate_zone_population() st.calculate_summary() st.calculate_user_date_histogram() # graphs st.daily_cdrs() st.daily_unique_users() st.daily_unique_locations() st.daily_average_cdrs() st.daily_unique_average_locations() # frequent locations (Report) st.frequent_locations() st.frequent_locations_night() # Prerequisite for Origin-Destination, if not wishing to calculate OD, kindly comment the code st.rank1_frequent_locations() # Require frequent_locations() in run_statistics.py print('Overall time elapsed: {} seconds'.format(format_two_point_time(start, time.time())))
plt.axvspan(690, 730, facecolor='darkred', alpha=0.2) plt.axvspan(760, 850, facecolor='gray', alpha=0.1) plt.savefig(os.path.join(output_path_graph, plot_sample_name)+"_min_max_avg.jpg") if (output_csv): output_file_name = os.path.join(output_path_csv, plot_sample_name)+".csv" print('export_to_csv: {}'.format(output_file_name)) if os.path.exists(output_file_name): os.remove(output_file_name) data_t = c.data.transpose() data_t.insert(0, 'SAMPLE_CODE', plot_sample_name) data_t.index.name = 'ASD_FILE' data_t.to_csv(output_file_name, sep=',', encoding='utf-8') line_count += 1 print(f'Processed {line_count} lines.') # %% print('Overall time elapsed: {} seconds'.format(format_two_point_time(start, time.time()))) # %%
def consolidate_table(self): # TODO join here provider_prefix = self.config.provider_prefix arg_cdr_prep = self.data.arg_cdr_prep arg_cdr_con = self.data.arg_cdr_con cursor = self.hc.cursor print('########## CREATE CONSOLIDATE CDR TABLE ##########') print('Checking and dropping consolidate cdr table if existing.') print('Checking latitude and lontitude in the preprocess table') cursor.execute( 'select max(latitude), max(longitude) from {provider_prefix}_preprocess' .format(provider_prefix=provider_prefix)) res = cursor.fetchall() latitude = res[0][0] longitude = res[0][1] arg_cdr_con_with_join_cond = [] if (latitude == -1 and longitude == -1): print('Join to make consolidate') for arg in arg_cdr_con: if str.lower(arg) in ['longitude', 'latitude']: arg_cdr_con_with_join_cond.append('a2.' + arg + ' as ' + arg) else: arg_cdr_con_with_join_cond.append('a1.' + arg + ' as ' + arg) insert_script_loc = 'cdr_and_mapping/insert_consolidate_cdr_join.sql' else: arg_cdr_con_with_join_cond = arg_cdr_con print('No join') insert_script_loc = 'cdr_and_mapping/insert_consolidate_cdr.sql' timer = time.time() cursor.execute( 'DROP TABLE IF EXISTS {provider_prefix}_consolidate_data_all'. format(provider_prefix=provider_prefix)) print( 'Checked and dropped preprocess cdr table if existing. Elapsed time: {} seconds' .format(format_two_point_time(timer, time.time()))) timer = time.time() print('Creating consolidate table') raw_sql = sql_to_string('cdr_and_mapping/create_consolidate_cdr.sql') query = raw_sql.format(provider_prefix=provider_prefix, arg_prep=' ,'.join(arg_cdr_prep)) cursor.execute(query) print('Created consolidate cdr table. Elapsed time: {} seconds'.format( format_two_point_time(timer, time.time()))) timer = time.time() print('Columns in consolidate table: ' + ', '.join(arg_cdr_con_with_join_cond)) print('Inserting into the consolidate table') raw_sql = sql_to_string(insert_script_loc) query = raw_sql.format(provider_prefix=provider_prefix, arg_con=', '.join(arg_cdr_con_with_join_cond)) cursor.execute(query) print('Inserted into consolidate cdr table. Elapsed time: {} seconds'. format(format_two_point_time(timer, time.time()))) print('########## FINISHED CREATING CONSOLIDATE CDR TABLE ##########')
def export_indicator_01_02_03(self): provider_prefix = self.config.provider_prefix # cdr_data_table = self.config.cdr_data_table output_data_path = self.config.output_data_path package = 'wb_indicators' cursor = self.hc.cursor print('########## export indicator_1_2_3 ##########') timer = time.time() export_indicator(cursor,package,provider_prefix,'indicator03_admin0_date',output_data_path,'indicator01_02_admin2_hour', time.time()) export_indicator(cursor,package,provider_prefix,'indicator01_admin3_hour',output_data_path,'indicator01_02_admin3_hour', time.time()) export_indicator(cursor,package,provider_prefix,'indicator03_admin0_date',output_data_path,'indicator03_admin0_date', time.time()) export_indicator(cursor,package,provider_prefix,'indicator03_admin1_date',output_data_path,'indicator03_admin1_date', time.time()) export_indicator(cursor,package,provider_prefix,'indicator03_admin2_date',output_data_path,'indicator03_admin2_date', time.time()) export_indicator(cursor,package,provider_prefix,'indicator03_admin3_date',output_data_path,'indicator03_admin3_date', time.time()) # raw_sql = sql_to_string('wb_indicators/indicator01_admin2_hour_export.sql') # query = raw_sql.format(provider_prefix=provider_prefix) # cursor.execute(query) # file_path = '{output_data_path}/{provider_prefix}_indicator01_02_admin2_hour.csv'.format(provider_prefix=provider_prefix,output_data_path=output_data_path) # export_to_csv(file_path,cursor) print('########## FINISHED exprot indicator_1_2_3 ########## Elapsed time: {time} seconds'.format(time=format_two_point_time(timer, time.time())))
def export_indicator_06_11(self): provider_prefix = self.config.provider_prefix output_data_path = self.config.output_data_path package = 'wb_indicators' cursor = self.hc.cursor print('########## export indicator_06_11 ##########') timer = time.time() export_indicator(cursor,package,provider_prefix,'indicator06_admin3_week',output_data_path,'indicator06_admin3_week', time.time()) export_indicator(cursor,package,provider_prefix,'indicator06_admin2_week',output_data_path,'indicator06_admin2_week', time.time()) export_indicator(cursor,package,provider_prefix,'indicator11_admin3_month',output_data_path,'indicator11_admin3_month', time.time()) export_indicator(cursor,package,provider_prefix,'indicator11_admin2_month',output_data_path,'indicator11_admin2_month', time.time()) print('########## FINISHED export indicator_06_11 ########## Elapsed time: {time} seconds'.format(time=format_two_point_time(timer, time.time())))
def execute_multiple(cursor,package,sql_filename,sql_params,timer): print('Execute multiple queries...') raw_sql = sql_to_string('{package}/{sql_filename}.sql'.format(package=package,sql_filename=sql_filename)) # query = raw_sql.format(provider_prefix=provider_prefix, cdr_data_table=cdr_data_table) # params= {'provider_prefix':provider_prefix,'cdr_data_table':cdr_data_table} query = raw_sql.format(**sql_params) # cursor.execute(query) qList = query.split(";") for q in qList: if len(q.strip()) > 0: print('Execute {q}'.format(q=q)) cursor.execute(q) print('Finised Execute multiple queries. Elapsed time: {time} seconds'.format(time=format_two_point_time(timer, time.time())))