def calculate_indicator(cursor,package,provider_prefix,table_name,cdr_data_table,timer): print('Calculating {provider_prefix}_{table_name} table'.format(provider_prefix=provider_prefix,table_name=table_name)) raw_sql = sql_to_string('{package}/{table_name}.sql'.format(package=package,table_name=table_name)) query = raw_sql.format(provider_prefix=provider_prefix, cdr_data_table=cdr_data_table) cursor.execute(query) print('Created {provider_prefix}_{table_name} table. Elapsed time: {time} seconds' .format(provider_prefix=provider_prefix,table_name=table_name, time=format_two_point_time(timer, time.time())))
def create_od_sum(self): provider_prefix = self.config.provider_prefix cursor = self.hc.cursor print('########## CREATING OD SUM TABLE ##########') timer = time.time() print( 'Checking and dropping {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od_sum table if existing.' .format(provider_prefix=provider_prefix)) cursor.execute( 'DROP TABLE IF EXISTS {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od_sum ' .format(provider_prefix=provider_prefix)) print( 'Checked and dropped {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od_sum table if existing. ' 'Elapsed time: {time} seconds'.format( provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) print( 'Creating {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od_sum table' .format(provider_prefix=provider_prefix)) raw_sql = sql_to_string( 'origin_destination/create_la_cdr_all_with_ant_zone_by_uid_od_sum.sql' ) query = raw_sql.format(provider_prefix=provider_prefix) cursor.execute(query) print( 'Created {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od_sum table' .format(provider_prefix=provider_prefix)) timer = time.time() print( 'Inserting into {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od_sum table' .format(provider_prefix=provider_prefix)) raw_sql = sql_to_string( 'origin_destination/insert_la_cdr_all_with_ant_zone_by_uid_od_sum.sql' ) query = raw_sql.format(provider_prefix=provider_prefix) cursor.execute(query) print( 'Inserted into {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od_sum table. ' 'Elapsed time: {time} seconds'.format( provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) raw_sql = sql_to_string('origin_destination/od_to_csv.sql') query = raw_sql.format(provider_prefix=provider_prefix) cursor.execute(query) print('OD Result is stored in /tmp/hive/od_result') print('########## FINISHED CREATING OD SUM TABLE ##########')
def import_cell_tower_data_raw(self): provider_prefix = self.config.provider_prefix arg_cell_raw = self.data.arg_cell_raw input_cell_tower_delimiter = self.config.input_cell_tower_delimiter input_cell_tower_have_header = self.config.input_cell_tower_have_header input_cell_tower_files = self.config.input_cell_tower_files hadoop_data_path = self.config.hadoop_data_path cursor = self.hc.cursor print('########## IMPORT RAW MAPPING TABLE ##########') print('Checking and dropping raw mapping table if existing.') timer = time.time() cursor.execute( 'DROP TABLE IF EXISTS {provider_prefix}_cell_tower_data_raw'. format(provider_prefix=provider_prefix)) print( 'Checked and dropped raw mapping table if existing. Elapsed time: {} seconds' .format(format_two_point_time(timer, time.time()))) timer = time.time() print('Creating raw mapping table') raw_query = sql_to_string('cdr_and_mapping/create_raw_mapping.sql') query = raw_query.format(provider_prefix=provider_prefix, arg_raw=', '.join(arg_cell_raw), field_delimiter=input_cell_tower_delimiter, have_header=input_cell_tower_have_header) cursor.execute(query) print('Created raw mapping table. Elapsed time: {} seconds'.format( format_two_point_time(timer, time.time()))) timer = time.time() if len(input_cell_tower_files) < 1: print( 'Please check the input_cell_tower_files field in config.json and make sure the file is valid.' ) return elif len(input_cell_tower_files) == 1: cursor.execute( "load data local inpath '{hadoop_data_path}{hadoop_data_file}' " .format(hadoop_data_path=hadoop_data_path, hadoop_data_file=input_cell_tower_files[0]) + "overwrite into table {provider_prefix}_cell_tower_data_raw". format(provider_prefix=provider_prefix)) else: cursor.execute( "load data local inpath '{hadoop_data_path}{hadoop_data_file}' " .format(hadoop_data_path=hadoop_data_path, hadoop_data_file=input_cell_tower_files[0]) + "overwrite into table {provider_prefix}_cell_tower_data_raw". format(provider_prefix=provider_prefix)) for i in range(1, len(input_cell_tower_files)): cursor.execute( "load data local inpath '{hadoop_data_path}{hadoop_data_file}' " .format(hadoop_data_path=hadoop_data_path, hadoop_data_file=input_cell_tower_files[i]) + "into table {provider_prefix}_cell_tower_data_raw".format( provider_prefix=provider_prefix)) print('Imported to raw mapping table. Elapsed time: {} seconds'.format( format_two_point_time(timer, time.time()))) print('########## FINISHED IMPORTING TO RAW MAPPING TABLE ##########')
def create_od(self): provider_prefix = self.config.provider_prefix od_admin_unit = self.config.od_admin_unit cursor = self.hc.cursor print('########## CREATE OD TABLE ##########') timer = time.time() print( 'Checking and dropping {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od table if existing.' .format(provider_prefix=provider_prefix)) cursor.execute( 'DROP TABLE IF EXISTS {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od' .format(provider_prefix=provider_prefix)) print( 'Checked and dropped {provider_prefix}_la_cdr_all_with_ant_zone_by_uid table if existing.' ' Elapsed time: {time} seconds'.format( provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) print( 'Creating {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od table' .format(provider_prefix=provider_prefix)) timer = time.time() raw_sql = sql_to_string( 'origin_destination/create_la_cdr_all_with_ant_zone_by_uid_od.sql') query = raw_sql.format(provider_prefix=provider_prefix) cursor.execute(query) print( 'Created {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od table. Elapsed time: {time}' .format(provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) timer = time.time() print( 'Inserting into {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od table' .format(provider_prefix=provider_prefix)) raw_sql = sql_to_string( 'origin_destination/insert_la_cdr_all_with_ant_zone_by_uid_od.sql') query = raw_sql.format(provider_prefix=provider_prefix, target_unit=od_admin_unit) cursor.execute(query) print( 'Inserted into {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od table. Elapsed time: {time} seconds' .format(provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) print('########## FINISHED CREATING OD TABLE ##########')
def create_route_interpolation(self): provider_prefix = self.config.provider_prefix cursor = self.hc.cursor print('########## CREATE ROUTE INTERPOLATION TABLE ##########') timer = time.time() print( 'Checking and dropping {provider_prefix}_cdr_by_uid_trip_routing_array_apd table if existing.' .format(provider_prefix=provider_prefix)) cursor.execute( 'DROP TABLE IF EXISTS {provider_prefix}_cdr_by_uid_trip_routing_array_apd' .format(provider_prefix=provider_prefix)) print( 'Checked and dropped {provider_prefix}_cdr_by_uid_trip_routing_array_apd table if existing. ' 'Elapsed time: {time} seconds'.format( provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) timer = time.time() print( 'Creating {provider_prefix}_cdr_by_uid_trip_routing_array_apd table' .format(provider_prefix=provider_prefix)) raw_sql = sql_to_string('interpolation/create_route_interpolation.sql') query = raw_sql.format(provider_prefix=provider_prefix) cursor.execute(query) print( 'Created {provider_prefix}_cdr_by_uid_trip_routing_array_apd table. Elapsed time: {time} seconds' .format(provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) timer = time.time() raw_sql = sql_to_string('interpolation/insert_route_interpolation.sql') print( 'Inserting into {provider_prefix}_cdr_by_uid_trip_routing_array_apd table' .format(provider_prefix=provider_prefix)) query = raw_sql.format( provider_prefix=provider_prefix, max_size_interpolation=self.config.max_size_interpolation, osm=self.config.interpolation_osm_file_location.split('/')[-1], voronoi=self.config.interpolation_voronoi_file_location.split( '/')[-1]) cursor.execute(query) print( 'Inserted into {provider_prefix}_cdr_by_uid_trip_routing_array_apd table. Elapsed time: {time} seconds' .format(provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) print('########## FINISHED ROUTE INTERPOLATION TABLE ##########')
def preprocess_cell_tower_data(self): provider_prefix = self.config.provider_prefix check_duplicate = self.config.check_duplicate arg_cell_create = self.data.arg_cell_create arg_cell_map = self.data.arg_cell_map cursor = self.hc.cursor print('########## CREATE PREPROCESS MAPPING TABLE ##########') if check_duplicate: distinct = 'distinct' else: distinct = '' print('Checking and dropping preprocess mapping table if existing.') timer = time.time() cursor.execute( 'DROP TABLE IF EXISTS {provider_prefix}_cell_tower_data_preprocess' .format(provider_prefix=provider_prefix)) print( 'Checked and dropped preprocess mapping table if existing. Elapsed time: {} seconds' .format(format_two_point_time(timer, time.time()))) timer = time.time() print('Creating preprocess mapping table') raw_sql = sql_to_string( 'cdr_and_mapping/create_preprocess_mapping.sql') query = raw_sql.format(provider_prefix=provider_prefix, arg_create=', '.join(arg_cell_create)) cursor.execute(query) print('Created mapping preprocess table. Elapsed time: {} seconds'. format(format_two_point_time(timer, time.time()))) timer = time.time() # need username to get privilege print('Inserting into preprocess mapping table') raw_sql = sql_to_string( 'cdr_and_mapping/insert_preprocess_mapping.sql') query = raw_sql.format(provider_prefix=provider_prefix, distinct=distinct, arg=', '.join(arg_cell_map)) cursor.execute(query) print( 'Inserted into preprocess mapping table. Elapsed time: {} seconds'. format(format_two_point_time(timer, time.time()))) print( '########## FINISHED CREATING PREPROCESS MAPPING TABLE ##########')
def preprocess_data(self): provider_prefix = self.config.provider_prefix check_duplicate = self.config.check_duplicate arg_cdr_prep = self.data.arg_cdr_prep arg_cdr_map = self.data.arg_cdr_map cursor = self.hc.cursor print('########## CREATE PREPROCESS CDR TABLE ##########') if check_duplicate: distinct = 'distinct' else: distinct = '' print('Checking and dropping preprocess cdr table if existing.') timer = time.time() cursor.execute( 'DROP TABLE IF EXISTS {provider_prefix}_preprocess'.format( provider_prefix=provider_prefix)) print( 'Checked and dropped preprocess cdr table if existing. Elapsed time: {} seconds' .format(format_two_point_time(timer, time.time()))) timer = time.time() print('Creating preprocess cdr table.') raw_sql = sql_to_string('cdr_and_mapping/create_preprocess_cdr.sql') query = raw_sql.format(args=', '.join(arg_cdr_prep), provider_prefix=provider_prefix) cursor.execute(query) print('Created preprocess cdr table. Elapsed time: {} seconds'.format( format_two_point_time(timer, time.time()))) timer = time.time() print('Inserting into preprocess table') print('Columns in preprocess table mapped: ' + ', '.join(arg_cdr_map)) raw_sql = sql_to_string('cdr_and_mapping/insert_preprocess_cdr.sql') query = raw_sql.format(distinct=distinct, arg=', '.join(arg_cdr_map), provider_prefix=provider_prefix) cursor.execute(query) print('Inserted into preprocess cdr table. Elapsed time: {} seconds'. format(format_two_point_time(timer, time.time()))) print('########## FINISHED CREATING PREPROCESS CDR TABLE ##########')
def cell_tower_data_admin(self, admin): provider_prefix = self.config.provider_prefix check_invalid_lat_lng = self.config.check_invalid_lat_lng cursor = self.hc.cursor print('########## CREATE MAPPING ADMIN TABLE ##########') if check_invalid_lat_lng: check_lat_lng = 'and (latitude != 0 or longitude != 0) and latitude is not NULL and longitude is not NULL' else: check_lat_lng = '' print( 'Checking and dropping mapping {admin} table if existing.'.format( admin=admin)) timer = time.time() cursor.execute( 'DROP TABLE IF EXISTS {provider_prefix}_cell_tower_data_{admin}'. format(provider_prefix=provider_prefix, admin=admin)) print( 'Check and drop mapping {admin} table if existing. Elapsed time: {time} seconds' .format(admin=admin, time=format_two_point_time(timer, time.time()))) timer = time.time() print('Creating mapping {admin} table'.format(admin=admin)) raw_sql = sql_to_string('cdr_and_mapping/create_mapping_admin.sql') query = raw_sql.format(provider_prefix=provider_prefix, admin=admin) cursor.execute(query) print('Created mapping {admin} table. Elapsed time: {time} seconds'. format(admin=admin, time=format_two_point_time(timer, time.time()))) timer = time.time() print('Inserting into mapping {} table'.format(admin)) raw_sql = sql_to_string('cdr_and_mapping/insert_mapping_admin.sql') query = raw_sql.format(provider_prefix=provider_prefix, admin=admin, check_lat_lng=check_lat_lng) cursor.execute(query) print( 'Inserted into mapping {admin} table. Elapsed time: {time} seconds' .format(admin=admin, time=format_two_point_time(timer, time.time()))) print('########## FINISHED CREATING MAPPING ADMIN TABLE ##########')
def create_trip_format(self): provider_prefix = self.config.provider_prefix cursor = self.hc.cursor print( '########## CREATE CDR BY UID ARRAY TRIP FORMAT TABLE ##########') timer = time.time() print( 'Checking and dropping {provider_prefix}_cdr_by_uid_trip table if existing.' .format(provider_prefix=provider_prefix)) cursor.execute( 'DROP TABLE IF EXISTS {provider_prefix}_cdr_by_uid_trip'.format( provider_prefix=provider_prefix)) print( 'Checked and dropped {provider_prefix}_cdr_by_uid_trip table if existing. ' 'Elapsed time: {time} seconds'.format( provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) timer = time.time() print('Creating {provider_prefix}_cdr_by_uid_trip table'.format( provider_prefix=provider_prefix)) raw_sql = sql_to_string('interpolation/create_trip_format.sql') query = raw_sql.format(provider_prefix=provider_prefix) cursor.execute(query) print( 'Created {provider_prefix}_cdr_by_uid_trip table. Elapsed time: {time} seconds' .format(provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) timer = time.time() raw_sql = sql_to_string('interpolation/insert_trip_format.sql') print('Inserting into {provider_prefix}_cdr_by_uid_trip table'.format( provider_prefix=provider_prefix)) query = raw_sql.format(provider_prefix=provider_prefix) cursor.execute(query) print( 'Inserted into {provider_prefix}_cdr_by_uid_trip table. Elapsed time: {time} seconds' .format(provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) print( '########## FINISHED CREATING CDR BY UID TRIP FORMAT TABLE ##########' )
def create_trip_24hr_padding(self): provider_prefix = self.config.provider_prefix cursor = self.hc.cursor print('########## CREATE TRIP 24 HR PADDING TABLE ##########') timer = time.time() print( 'Checking and dropping {provider_prefix}_cdr_by_uid_trip_organized_array_apd table if existing.' .format(provider_prefix=provider_prefix)) cursor.execute( 'DROP TABLE IF EXISTS {provider_prefix}_cdr_by_uid_trip_organized_array_apd' .format(provider_prefix=provider_prefix)) print( 'Checked and dropped {provider_prefix}_cdr_by_uid_trip_organized_array_apd table if existing. ' 'Elapsed time: {time} seconds'.format( provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) timer = time.time() print( 'Creating {provider_prefix}_cdr_by_uid_trip_organized_array_apd table' .format(provider_prefix=provider_prefix)) raw_sql = sql_to_string('interpolation/create_trip_24_hr_padding.sql') query = raw_sql.format(provider_prefix=provider_prefix) cursor.execute(query) print( 'Created {provider_prefix}_cdr_by_uid_trip_organized_array_apd table. Elapsed time: {time} seconds' .format(provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) timer = time.time() raw_sql = sql_to_string('interpolation/insert_trip_24_hr_padding.sql') print( 'Inserting into {provider_prefix}_cdr_by_uid_trip_organized_array_apd table' .format(provider_prefix=provider_prefix)) query = raw_sql.format(provider_prefix=provider_prefix) cursor.execute(query) print( 'Inserted into {provider_prefix}_cdr_by_uid_trip_organized_array_apd table. Elapsed time: {time} seconds' .format(provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) print('########## FINISHED TRIP 24 HR PADDING TABLE ##########')
def execute_multiple(cursor,package,sql_filename,sql_params,timer): print('Execute multiple queries...') raw_sql = sql_to_string('{package}/{sql_filename}.sql'.format(package=package,sql_filename=sql_filename)) # query = raw_sql.format(provider_prefix=provider_prefix, cdr_data_table=cdr_data_table) # params= {'provider_prefix':provider_prefix,'cdr_data_table':cdr_data_table} query = raw_sql.format(**sql_params) # cursor.execute(query) qList = query.split(";") for q in qList: if len(q.strip()) > 0: print('Execute {q}'.format(q=q)) cursor.execute(q) print('Finised Execute multiple queries. Elapsed time: {time} seconds'.format(time=format_two_point_time(timer, time.time())))
def export_to_csv(self): provider_prefix = self.config.provider_prefix cursor = self.hc.cursor print('########## Exporting route interpolation to CSV ##########') timer = time.time() raw_sql = sql_to_string('interpolation/export_to_gps_format.sql') query = raw_sql.format(provider_prefix=provider_prefix) cursor.execute(query) print('Exported to CSV. Elapsed time: {time} seconds'.format( provider_prefix=provider_prefix, time=format_two_point_time(timer, time.time()))) print( '########## FINISHED EXPORTING, FILE LOCATED IN /tmp/hive/cdr_interpolation ##########' )
def consolidate_table(self): # TODO join here provider_prefix = self.config.provider_prefix arg_cdr_prep = self.data.arg_cdr_prep arg_cdr_con = self.data.arg_cdr_con cursor = self.hc.cursor print('########## CREATE CONSOLIDATE CDR TABLE ##########') print('Checking and dropping consolidate cdr table if existing.') print('Checking latitude and lontitude in the preprocess table') cursor.execute( 'select max(latitude), max(longitude) from {provider_prefix}_preprocess' .format(provider_prefix=provider_prefix)) res = cursor.fetchall() latitude = res[0][0] longitude = res[0][1] arg_cdr_con_with_join_cond = [] if (latitude == -1 and longitude == -1): print('Join to make consolidate') for arg in arg_cdr_con: if str.lower(arg) in ['longitude', 'latitude']: arg_cdr_con_with_join_cond.append('a2.' + arg + ' as ' + arg) else: arg_cdr_con_with_join_cond.append('a1.' + arg + ' as ' + arg) insert_script_loc = 'cdr_and_mapping/insert_consolidate_cdr_join.sql' else: arg_cdr_con_with_join_cond = arg_cdr_con print('No join') insert_script_loc = 'cdr_and_mapping/insert_consolidate_cdr.sql' timer = time.time() cursor.execute( 'DROP TABLE IF EXISTS {provider_prefix}_consolidate_data_all'. format(provider_prefix=provider_prefix)) print( 'Checked and dropped preprocess cdr table if existing. Elapsed time: {} seconds' .format(format_two_point_time(timer, time.time()))) timer = time.time() print('Creating consolidate table') raw_sql = sql_to_string('cdr_and_mapping/create_consolidate_cdr.sql') query = raw_sql.format(provider_prefix=provider_prefix, arg_prep=' ,'.join(arg_cdr_prep)) cursor.execute(query) print('Created consolidate cdr table. Elapsed time: {} seconds'.format( format_two_point_time(timer, time.time()))) timer = time.time() print('Columns in consolidate table: ' + ', '.join(arg_cdr_con_with_join_cond)) print('Inserting into the consolidate table') raw_sql = sql_to_string(insert_script_loc) query = raw_sql.format(provider_prefix=provider_prefix, arg_con=', '.join(arg_cdr_con_with_join_cond)) cursor.execute(query) print('Inserted into consolidate cdr table. Elapsed time: {} seconds'. format(format_two_point_time(timer, time.time()))) print('########## FINISHED CREATING CONSOLIDATE CDR TABLE ##########')
def export_indicator(cursor,package,provider_prefix,table_name,output_data_path,output_table_name,timer): raw_sql = sql_to_string('{package}/{table_name}_export.sql'.format(package=package,table_name=table_name)) query = raw_sql.format(provider_prefix=provider_prefix) cursor.execute(query) file_path = '{output_data_path}/{provider_prefix}_{output_table_name}.csv'.format(provider_prefix=provider_prefix,output_data_path=output_data_path,output_table_name=output_table_name) export_to_csv(file_path,cursor)