Ejemplo n.º 1
0
    def import_cell_tower_data_raw(self):
        provider_prefix = self.config.provider_prefix
        arg_cell_raw = self.data.arg_cell_raw
        input_cell_tower_delimiter = self.config.input_cell_tower_delimiter
        input_cell_tower_have_header = self.config.input_cell_tower_have_header
        input_cell_tower_files = self.config.input_cell_tower_files
        hadoop_data_path = self.config.hadoop_data_path
        cursor = self.hc.cursor
        print('########## IMPORT RAW MAPPING TABLE ##########')
        print('Checking and dropping raw mapping table if existing.')
        timer = time.time()
        cursor.execute(
            'DROP TABLE IF EXISTS {provider_prefix}_cell_tower_data_raw'.
            format(provider_prefix=provider_prefix))
        print(
            'Checked and dropped raw mapping table if existing. Elapsed time: {} seconds'
            .format(format_two_point_time(timer, time.time())))
        timer = time.time()

        print('Creating raw mapping table')
        raw_query = sql_to_string('cdr_and_mapping/create_raw_mapping.sql')
        query = raw_query.format(provider_prefix=provider_prefix,
                                 arg_raw=', '.join(arg_cell_raw),
                                 field_delimiter=input_cell_tower_delimiter,
                                 have_header=input_cell_tower_have_header)
        cursor.execute(query)
        print('Created raw mapping table. Elapsed time: {} seconds'.format(
            format_two_point_time(timer, time.time())))
        timer = time.time()

        if len(input_cell_tower_files) < 1:
            print(
                'Please check the input_cell_tower_files field in config.json and make sure the file is valid.'
            )
            return
        elif len(input_cell_tower_files) == 1:
            cursor.execute(
                "load data local inpath '{hadoop_data_path}{hadoop_data_file}' "
                .format(hadoop_data_path=hadoop_data_path,
                        hadoop_data_file=input_cell_tower_files[0]) +
                "overwrite into table {provider_prefix}_cell_tower_data_raw".
                format(provider_prefix=provider_prefix))
        else:
            cursor.execute(
                "load data local inpath '{hadoop_data_path}{hadoop_data_file}' "
                .format(hadoop_data_path=hadoop_data_path,
                        hadoop_data_file=input_cell_tower_files[0]) +
                "overwrite into table {provider_prefix}_cell_tower_data_raw".
                format(provider_prefix=provider_prefix))
            for i in range(1, len(input_cell_tower_files)):
                cursor.execute(
                    "load data local inpath '{hadoop_data_path}{hadoop_data_file}' "
                    .format(hadoop_data_path=hadoop_data_path,
                            hadoop_data_file=input_cell_tower_files[i]) +
                    "into table {provider_prefix}_cell_tower_data_raw".format(
                        provider_prefix=provider_prefix))
        print('Imported to raw mapping table. Elapsed time: {} seconds'.format(
            format_two_point_time(timer, time.time())))
        print('########## FINISHED IMPORTING TO RAW MAPPING TABLE ##########')
def main():
    # argument parser
    start = time.time()
    parser = argparse.ArgumentParser(
        description='Argument indicating the configuration file')

    # add configuration argument
    parser.add_argument(
        "-c",
        "--config",
        help="add a configuration file you would like to process the cdr data"
        " \n ex. py py_hive_connect.py -c config.json",
        action="store")

    # parse config to args.config
    args = parser.parse_args()

    config = Config(args.config)
    hc = HiveConnection(host=config.host, port=config.port, user=config.user)

    # initialize hive and create tables
    table_creator = HiveTableCreator(config)
    table_creator.initialize(
        'hive_init_commands/initial_hive_commands_interpolation.json'
    )  # mandatory (init hive)

    # init interpolation generators
    it = Interpolation(config)

    # interpolation
    it.calculate_interpolation()

    print('Overall time elapsed: {} seconds'.format(
        format_two_point_time(start, time.time())))
 def initialize(self, init_cmd_file):
     print('########## Initilizing Hive ##########')
     timer = time.time()
     output_report_location = self.config.output_report_location
     output_graph_location = self.config.output_graph_location
     cursor = self.hc.cursor
     for command in json_file_to_object(init_cmd_file)['hive_commands']:
         if command.startswith('use'):
             command = command.format(db_name=self.config.db_name)
         elif '{poi_location}' in command:
             command = command.format(
                 poi_location=self.config.interpolation_poi_file_location)
         elif '{osm_location}' in command:
             command = command.format(
                 osm_location=self.config.interpolation_osm_file_location)
         elif '{voronoi_location}' in command:
             command = command.format(voronoi_location=self.config.
                                      interpolation_voronoi_file_location)
         cursor.execute(command)
     if not os.path.exists(output_report_location):
         os.makedirs(output_report_location)
     if not os.path.exists(output_graph_location):
         os.makedirs(output_graph_location)
     print('########## Done. Time elapsed: {} seconds ##########'.format(
         format_two_point_time(timer, time.time())))
Ejemplo n.º 4
0
def calculate_indicator(cursor,package,provider_prefix,table_name,cdr_data_table,timer):
    print('Calculating {provider_prefix}_{table_name} table'.format(provider_prefix=provider_prefix,table_name=table_name))
    raw_sql = sql_to_string('{package}/{table_name}.sql'.format(package=package,table_name=table_name))
    query = raw_sql.format(provider_prefix=provider_prefix, cdr_data_table=cdr_data_table)
    cursor.execute(query)
    print('Created {provider_prefix}_{table_name} table. Elapsed time: {time} seconds'
            .format(provider_prefix=provider_prefix,table_name=table_name, time=format_two_point_time(timer, time.time())))
Ejemplo n.º 5
0
    def create_od_sum(self):
        provider_prefix = self.config.provider_prefix
        cursor = self.hc.cursor
        print('########## CREATING OD SUM TABLE ##########')
        timer = time.time()
        print(
            'Checking and dropping {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od_sum table if existing.'
            .format(provider_prefix=provider_prefix))
        cursor.execute(
            'DROP TABLE IF EXISTS {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od_sum '
            .format(provider_prefix=provider_prefix))
        print(
            'Checked and dropped {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od_sum table if existing. '
            'Elapsed time: {time} seconds'.format(
                provider_prefix=provider_prefix,
                time=format_two_point_time(timer, time.time())))
        print(
            'Creating {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od_sum table'
            .format(provider_prefix=provider_prefix))
        raw_sql = sql_to_string(
            'origin_destination/create_la_cdr_all_with_ant_zone_by_uid_od_sum.sql'
        )
        query = raw_sql.format(provider_prefix=provider_prefix)
        cursor.execute(query)
        print(
            'Created {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od_sum table'
            .format(provider_prefix=provider_prefix))
        timer = time.time()

        print(
            'Inserting into {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od_sum table'
            .format(provider_prefix=provider_prefix))
        raw_sql = sql_to_string(
            'origin_destination/insert_la_cdr_all_with_ant_zone_by_uid_od_sum.sql'
        )
        query = raw_sql.format(provider_prefix=provider_prefix)
        cursor.execute(query)
        print(
            'Inserted into {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od_sum table. '
            'Elapsed time: {time} seconds'.format(
                provider_prefix=provider_prefix,
                time=format_two_point_time(timer, time.time())))
        raw_sql = sql_to_string('origin_destination/od_to_csv.sql')
        query = raw_sql.format(provider_prefix=provider_prefix)
        cursor.execute(query)
        print('OD Result is stored in /tmp/hive/od_result')
        print('########## FINISHED CREATING OD SUM TABLE ##########')
Ejemplo n.º 6
0
    def create_route_interpolation(self):
        provider_prefix = self.config.provider_prefix
        cursor = self.hc.cursor
        print('########## CREATE ROUTE INTERPOLATION TABLE ##########')
        timer = time.time()
        print(
            'Checking and dropping {provider_prefix}_cdr_by_uid_trip_routing_array_apd table if existing.'
            .format(provider_prefix=provider_prefix))
        cursor.execute(
            'DROP TABLE IF EXISTS {provider_prefix}_cdr_by_uid_trip_routing_array_apd'
            .format(provider_prefix=provider_prefix))
        print(
            'Checked and dropped {provider_prefix}_cdr_by_uid_trip_routing_array_apd table if existing. '
            'Elapsed time: {time} seconds'.format(
                provider_prefix=provider_prefix,
                time=format_two_point_time(timer, time.time())))
        timer = time.time()
        print(
            'Creating {provider_prefix}_cdr_by_uid_trip_routing_array_apd table'
            .format(provider_prefix=provider_prefix))
        raw_sql = sql_to_string('interpolation/create_route_interpolation.sql')
        query = raw_sql.format(provider_prefix=provider_prefix)
        cursor.execute(query)

        print(
            'Created {provider_prefix}_cdr_by_uid_trip_routing_array_apd table. Elapsed time: {time} seconds'
            .format(provider_prefix=provider_prefix,
                    time=format_two_point_time(timer, time.time())))
        timer = time.time()
        raw_sql = sql_to_string('interpolation/insert_route_interpolation.sql')
        print(
            'Inserting into {provider_prefix}_cdr_by_uid_trip_routing_array_apd table'
            .format(provider_prefix=provider_prefix))
        query = raw_sql.format(
            provider_prefix=provider_prefix,
            max_size_interpolation=self.config.max_size_interpolation,
            osm=self.config.interpolation_osm_file_location.split('/')[-1],
            voronoi=self.config.interpolation_voronoi_file_location.split(
                '/')[-1])
        cursor.execute(query)

        print(
            'Inserted into {provider_prefix}_cdr_by_uid_trip_routing_array_apd table. Elapsed time: {time} seconds'
            .format(provider_prefix=provider_prefix,
                    time=format_two_point_time(timer, time.time())))
        print('########## FINISHED ROUTE INTERPOLATION TABLE ##########')
Ejemplo n.º 7
0
    def create_od(self):
        provider_prefix = self.config.provider_prefix
        od_admin_unit = self.config.od_admin_unit
        cursor = self.hc.cursor
        print('########## CREATE OD TABLE ##########')
        timer = time.time()
        print(
            'Checking and dropping {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od table if existing.'
            .format(provider_prefix=provider_prefix))
        cursor.execute(
            'DROP TABLE IF EXISTS {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od'
            .format(provider_prefix=provider_prefix))

        print(
            'Checked and dropped  {provider_prefix}_la_cdr_all_with_ant_zone_by_uid table if existing.'
            ' Elapsed time: {time} seconds'.format(
                provider_prefix=provider_prefix,
                time=format_two_point_time(timer, time.time())))
        print(
            'Creating {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od table'
            .format(provider_prefix=provider_prefix))
        timer = time.time()
        raw_sql = sql_to_string(
            'origin_destination/create_la_cdr_all_with_ant_zone_by_uid_od.sql')
        query = raw_sql.format(provider_prefix=provider_prefix)
        cursor.execute(query)

        print(
            'Created {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od table. Elapsed time: {time}'
            .format(provider_prefix=provider_prefix,
                    time=format_two_point_time(timer, time.time())))
        timer = time.time()
        print(
            'Inserting into {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od table'
            .format(provider_prefix=provider_prefix))
        raw_sql = sql_to_string(
            'origin_destination/insert_la_cdr_all_with_ant_zone_by_uid_od.sql')
        query = raw_sql.format(provider_prefix=provider_prefix,
                               target_unit=od_admin_unit)
        cursor.execute(query)

        print(
            'Inserted into {provider_prefix}_la_cdr_all_with_ant_zone_by_uid_od table. Elapsed time: {time} seconds'
            .format(provider_prefix=provider_prefix,
                    time=format_two_point_time(timer, time.time())))
        print('########## FINISHED CREATING OD TABLE ##########')
Ejemplo n.º 8
0
    def preprocess_cell_tower_data(self):
        provider_prefix = self.config.provider_prefix
        check_duplicate = self.config.check_duplicate
        arg_cell_create = self.data.arg_cell_create
        arg_cell_map = self.data.arg_cell_map
        cursor = self.hc.cursor
        print('########## CREATE PREPROCESS MAPPING TABLE ##########')
        if check_duplicate:
            distinct = 'distinct'
        else:
            distinct = ''
        print('Checking and dropping preprocess mapping table if existing.')
        timer = time.time()

        cursor.execute(
            'DROP TABLE IF EXISTS {provider_prefix}_cell_tower_data_preprocess'
            .format(provider_prefix=provider_prefix))
        print(
            'Checked and dropped preprocess mapping table if existing. Elapsed time: {} seconds'
            .format(format_two_point_time(timer, time.time())))
        timer = time.time()

        print('Creating preprocess mapping table')
        raw_sql = sql_to_string(
            'cdr_and_mapping/create_preprocess_mapping.sql')
        query = raw_sql.format(provider_prefix=provider_prefix,
                               arg_create=', '.join(arg_cell_create))
        cursor.execute(query)
        print('Created mapping preprocess table. Elapsed time: {} seconds'.
              format(format_two_point_time(timer, time.time())))
        timer = time.time()
        # need username to get privilege

        print('Inserting into preprocess mapping table')
        raw_sql = sql_to_string(
            'cdr_and_mapping/insert_preprocess_mapping.sql')
        query = raw_sql.format(provider_prefix=provider_prefix,
                               distinct=distinct,
                               arg=', '.join(arg_cell_map))
        cursor.execute(query)
        print(
            'Inserted into preprocess mapping table. Elapsed time: {} seconds'.
            format(format_two_point_time(timer, time.time())))
        print(
            '########## FINISHED CREATING PREPROCESS MAPPING TABLE ##########')
Ejemplo n.º 9
0
    def cell_tower_data_admin(self, admin):
        provider_prefix = self.config.provider_prefix
        check_invalid_lat_lng = self.config.check_invalid_lat_lng
        cursor = self.hc.cursor

        print('########## CREATE MAPPING ADMIN TABLE ##########')
        if check_invalid_lat_lng:
            check_lat_lng = 'and (latitude != 0 or longitude != 0) and latitude is not NULL and longitude is not NULL'
        else:
            check_lat_lng = ''
        print(
            'Checking and dropping mapping {admin} table if existing.'.format(
                admin=admin))
        timer = time.time()
        cursor.execute(
            'DROP TABLE IF EXISTS {provider_prefix}_cell_tower_data_{admin}'.
            format(provider_prefix=provider_prefix, admin=admin))
        print(
            'Check and drop mapping {admin} table if existing. Elapsed time: {time} seconds'
            .format(admin=admin,
                    time=format_two_point_time(timer, time.time())))
        timer = time.time()

        print('Creating mapping {admin} table'.format(admin=admin))
        raw_sql = sql_to_string('cdr_and_mapping/create_mapping_admin.sql')
        query = raw_sql.format(provider_prefix=provider_prefix, admin=admin)
        cursor.execute(query)
        print('Created mapping {admin} table. Elapsed time: {time} seconds'.
              format(admin=admin,
                     time=format_two_point_time(timer, time.time())))
        timer = time.time()

        print('Inserting into mapping {} table'.format(admin))
        raw_sql = sql_to_string('cdr_and_mapping/insert_mapping_admin.sql')
        query = raw_sql.format(provider_prefix=provider_prefix,
                               admin=admin,
                               check_lat_lng=check_lat_lng)
        cursor.execute(query)
        print(
            'Inserted into mapping {admin} table. Elapsed time: {time} seconds'
            .format(admin=admin,
                    time=format_two_point_time(timer, time.time())))
        print('########## FINISHED CREATING MAPPING ADMIN TABLE ##########')
Ejemplo n.º 10
0
    def preprocess_data(self):
        provider_prefix = self.config.provider_prefix
        check_duplicate = self.config.check_duplicate
        arg_cdr_prep = self.data.arg_cdr_prep
        arg_cdr_map = self.data.arg_cdr_map
        cursor = self.hc.cursor

        print('########## CREATE PREPROCESS CDR TABLE ##########')
        if check_duplicate:
            distinct = 'distinct'
        else:
            distinct = ''

        print('Checking and dropping preprocess cdr table if existing.')
        timer = time.time()
        cursor.execute(
            'DROP TABLE IF EXISTS {provider_prefix}_preprocess'.format(
                provider_prefix=provider_prefix))
        print(
            'Checked and dropped preprocess cdr table if existing. Elapsed time: {} seconds'
            .format(format_two_point_time(timer, time.time())))
        timer = time.time()

        print('Creating preprocess cdr table.')
        raw_sql = sql_to_string('cdr_and_mapping/create_preprocess_cdr.sql')
        query = raw_sql.format(args=', '.join(arg_cdr_prep),
                               provider_prefix=provider_prefix)
        cursor.execute(query)

        print('Created preprocess cdr table. Elapsed time: {} seconds'.format(
            format_two_point_time(timer, time.time())))
        timer = time.time()

        print('Inserting into preprocess table')
        print('Columns in preprocess table mapped: ' + ', '.join(arg_cdr_map))
        raw_sql = sql_to_string('cdr_and_mapping/insert_preprocess_cdr.sql')
        query = raw_sql.format(distinct=distinct,
                               arg=', '.join(arg_cdr_map),
                               provider_prefix=provider_prefix)
        cursor.execute(query)
        print('Inserted into preprocess cdr table. Elapsed time: {} seconds'.
              format(format_two_point_time(timer, time.time())))
        print('########## FINISHED CREATING PREPROCESS CDR TABLE ##########')
Ejemplo n.º 11
0
    def create_trip_format(self):
        provider_prefix = self.config.provider_prefix
        cursor = self.hc.cursor
        print(
            '########## CREATE CDR BY UID ARRAY TRIP FORMAT TABLE ##########')
        timer = time.time()
        print(
            'Checking and dropping {provider_prefix}_cdr_by_uid_trip table if existing.'
            .format(provider_prefix=provider_prefix))
        cursor.execute(
            'DROP TABLE IF EXISTS {provider_prefix}_cdr_by_uid_trip'.format(
                provider_prefix=provider_prefix))
        print(
            'Checked and dropped {provider_prefix}_cdr_by_uid_trip  table if existing. '
            'Elapsed time: {time} seconds'.format(
                provider_prefix=provider_prefix,
                time=format_two_point_time(timer, time.time())))
        timer = time.time()
        print('Creating {provider_prefix}_cdr_by_uid_trip table'.format(
            provider_prefix=provider_prefix))
        raw_sql = sql_to_string('interpolation/create_trip_format.sql')
        query = raw_sql.format(provider_prefix=provider_prefix)
        cursor.execute(query)

        print(
            'Created {provider_prefix}_cdr_by_uid_trip table. Elapsed time: {time} seconds'
            .format(provider_prefix=provider_prefix,
                    time=format_two_point_time(timer, time.time())))
        timer = time.time()
        raw_sql = sql_to_string('interpolation/insert_trip_format.sql')
        print('Inserting into {provider_prefix}_cdr_by_uid_trip table'.format(
            provider_prefix=provider_prefix))
        query = raw_sql.format(provider_prefix=provider_prefix)
        cursor.execute(query)

        print(
            'Inserted into {provider_prefix}_cdr_by_uid_trip table. Elapsed time: {time} seconds'
            .format(provider_prefix=provider_prefix,
                    time=format_two_point_time(timer, time.time())))
        print(
            '########## FINISHED CREATING CDR BY UID TRIP FORMAT TABLE ##########'
        )
def main():
    # argument parser
    start = time.time()
    parser = argparse.ArgumentParser(
        description='Argument indicating the configuration file')

    # add configuration argument
    parser.add_argument(
        "-c",
        "--config",
        help="add a configuration file you would like to process the cdr data"
        " \n ex. py run_wb_indicators.py -c config.json",
        action="store")

    # parse config to args.config
    args = parser.parse_args()

    config = Config(args.config)
    HiveConnection(host=config.__dict__["host"],
                   port=config.__dict__["port"],
                   user=config.__dict__["user"])

    table_creator = HiveTableCreator(config)
    table_creator.initialize(
        'hive_init_commands/initial_hive_commands_wb_indicators.json'
    )  # mandatory (init hive)

    # init indicators generators
    ig = WBIndicators(config)

    # user section here
    # run command

    ig.calculate_indicator_01_02_03()
    ig.calculate_indicator_06_11()
    ig.calculate_indicator_04()
    ig.calculate_indicator_09(
    )  # need to run after "calculate_indicator_06_11"
    ig.calculate_indicator_10()  # need to run after "calculate_indicator_09"
    ig.calculate_indicator_05()  # need to run after "calculate_indicator_10"
    ig.calculate_indicator_07_08(
    )  # need to run after "calculate_indicator_06_11,calculate_indicator_10"

    # export output
    ig.export_indicator_01_02_03()
    ig.export_indicator_06_11()
    ig.export_indicator_04()
    ig.export_indicator_09()
    ig.export_indicator_10()
    ig.export_indicator_05()
    ig.export_indicator_07_08()

    print('Overall time elapsed: {} seconds'.format(
        format_two_point_time(start, time.time())))
Ejemplo n.º 13
0
    def create_trip_24hr_padding(self):
        provider_prefix = self.config.provider_prefix
        cursor = self.hc.cursor
        print('########## CREATE TRIP 24 HR PADDING TABLE ##########')
        timer = time.time()
        print(
            'Checking and dropping {provider_prefix}_cdr_by_uid_trip_organized_array_apd table if existing.'
            .format(provider_prefix=provider_prefix))
        cursor.execute(
            'DROP TABLE IF EXISTS {provider_prefix}_cdr_by_uid_trip_organized_array_apd'
            .format(provider_prefix=provider_prefix))
        print(
            'Checked and dropped {provider_prefix}_cdr_by_uid_trip_organized_array_apd table if existing. '
            'Elapsed time: {time} seconds'.format(
                provider_prefix=provider_prefix,
                time=format_two_point_time(timer, time.time())))
        timer = time.time()
        print(
            'Creating {provider_prefix}_cdr_by_uid_trip_organized_array_apd table'
            .format(provider_prefix=provider_prefix))
        raw_sql = sql_to_string('interpolation/create_trip_24_hr_padding.sql')
        query = raw_sql.format(provider_prefix=provider_prefix)
        cursor.execute(query)

        print(
            'Created {provider_prefix}_cdr_by_uid_trip_organized_array_apd table. Elapsed time: {time} seconds'
            .format(provider_prefix=provider_prefix,
                    time=format_two_point_time(timer, time.time())))
        timer = time.time()
        raw_sql = sql_to_string('interpolation/insert_trip_24_hr_padding.sql')
        print(
            'Inserting into {provider_prefix}_cdr_by_uid_trip_organized_array_apd table'
            .format(provider_prefix=provider_prefix))
        query = raw_sql.format(provider_prefix=provider_prefix)
        cursor.execute(query)

        print(
            'Inserted into {provider_prefix}_cdr_by_uid_trip_organized_array_apd table. Elapsed time: {time} seconds'
            .format(provider_prefix=provider_prefix,
                    time=format_two_point_time(timer, time.time())))
        print('########## FINISHED TRIP 24 HR PADDING TABLE ##########')
Ejemplo n.º 14
0
 def export_to_csv(self):
     provider_prefix = self.config.provider_prefix
     cursor = self.hc.cursor
     print('########## Exporting route interpolation to CSV ##########')
     timer = time.time()
     raw_sql = sql_to_string('interpolation/export_to_gps_format.sql')
     query = raw_sql.format(provider_prefix=provider_prefix)
     cursor.execute(query)
     print('Exported to CSV. Elapsed time: {time} seconds'.format(
         provider_prefix=provider_prefix,
         time=format_two_point_time(timer, time.time())))
     print(
         '########## FINISHED EXPORTING, FILE LOCATED IN /tmp/hive/cdr_interpolation ##########'
     )
Ejemplo n.º 15
0
 def create_tables(self):
     print('########## Creating Tables ##########')
     timer = time.time()
     self.import_cell_tower_data_raw()
     self.preprocess_cell_tower_data()
     admins = get_admin_units_from_mapping(self.config.cdr_cell_tower)
     for admin in admins:
         self.cell_tower_data_admin(admin)
     self.import_raw()
     self.preprocess_data()
     self.consolidate_table()
     print(
         '########## Done create all tables. Time elapsed: {} seconds ##########'
         .format(format_two_point_time(timer, time.time())))
Ejemplo n.º 16
0
def main():
    # argument parser
    start = time.time()
    parser = argparse.ArgumentParser(description='Argument indicating the configuration file')

    # add configuration argument
    parser.add_argument("-c", "--config", help="add a configuration file you would like to process the cdr data"
                                               " \n ex. py py_hive_connect.py -c config.json",
                        action="store")

    # parse config to args.config
    args = parser.parse_args()

    config = Config(args.config)
    HiveConnection(host=config.host, port=config.port, user=config.user)

    table_creator = HiveTableCreator(config)
    table_creator.initialize('hive_init_commands/initial_hive_commands_stats.json')  # mandatory (init hive)

    # init stats generators
    st = Statistics(config)

    # user section here
    # reports
    st.calculate_data_statistics()
    st.calculate_daily_statistics()
    st.calculate_monthly_statistics()
    st.calculate_zone_population()
    st.calculate_summary()
    st.calculate_user_date_histogram()
    # graphs
    st.daily_cdrs()
    st.daily_unique_users()
    st.daily_unique_locations()
    st.daily_average_cdrs()
    st.daily_unique_average_locations()

    # frequent locations (Report)
    st.frequent_locations()
    st.frequent_locations_night()

    # Prerequisite for Origin-Destination, if not wishing to calculate OD, kindly comment the code
    st.rank1_frequent_locations()  # Require frequent_locations() in run_statistics.py

    print('Overall time elapsed: {} seconds'.format(format_two_point_time(start, time.time())))
Ejemplo n.º 17
0
                plt.axvspan(690, 730, facecolor='darkred', alpha=0.2)
                plt.axvspan(760, 850, facecolor='gray', alpha=0.1)
                plt.savefig(os.path.join(output_path_graph, plot_sample_name)+"_min_max_avg.jpg")


            if (output_csv): 
                output_file_name = os.path.join(output_path_csv, plot_sample_name)+".csv"
                print('export_to_csv: {}'.format(output_file_name))
                if os.path.exists(output_file_name):
                    os.remove(output_file_name)
                
                data_t = c.data.transpose()
                data_t.insert(0, 'SAMPLE_CODE', plot_sample_name)
                data_t.index.name = 'ASD_FILE'
                data_t.to_csv(output_file_name, sep=',', encoding='utf-8')

            line_count += 1

    print(f'Processed {line_count} lines.')



# %%
print('Overall time elapsed: {} seconds'.format(format_two_point_time(start, time.time())))


# %%



Ejemplo n.º 18
0
    def consolidate_table(self):
        # TODO join here
        provider_prefix = self.config.provider_prefix
        arg_cdr_prep = self.data.arg_cdr_prep
        arg_cdr_con = self.data.arg_cdr_con
        cursor = self.hc.cursor
        print('########## CREATE CONSOLIDATE CDR TABLE ##########')
        print('Checking and dropping consolidate cdr table if existing.')

        print('Checking latitude and lontitude in the preprocess table')
        cursor.execute(
            'select max(latitude), max(longitude) from {provider_prefix}_preprocess'
            .format(provider_prefix=provider_prefix))
        res = cursor.fetchall()

        latitude = res[0][0]
        longitude = res[0][1]
        arg_cdr_con_with_join_cond = []
        if (latitude == -1 and longitude == -1):
            print('Join to make consolidate')
            for arg in arg_cdr_con:
                if str.lower(arg) in ['longitude', 'latitude']:
                    arg_cdr_con_with_join_cond.append('a2.' + arg + ' as ' +
                                                      arg)
                else:
                    arg_cdr_con_with_join_cond.append('a1.' + arg + ' as ' +
                                                      arg)
            insert_script_loc = 'cdr_and_mapping/insert_consolidate_cdr_join.sql'
        else:
            arg_cdr_con_with_join_cond = arg_cdr_con
            print('No join')
            insert_script_loc = 'cdr_and_mapping/insert_consolidate_cdr.sql'

        timer = time.time()
        cursor.execute(
            'DROP TABLE IF EXISTS {provider_prefix}_consolidate_data_all'.
            format(provider_prefix=provider_prefix))
        print(
            'Checked and dropped preprocess cdr table if existing. Elapsed time: {} seconds'
            .format(format_two_point_time(timer, time.time())))
        timer = time.time()

        print('Creating consolidate table')
        raw_sql = sql_to_string('cdr_and_mapping/create_consolidate_cdr.sql')
        query = raw_sql.format(provider_prefix=provider_prefix,
                               arg_prep=' ,'.join(arg_cdr_prep))
        cursor.execute(query)
        print('Created consolidate cdr table. Elapsed time: {} seconds'.format(
            format_two_point_time(timer, time.time())))
        timer = time.time()

        print('Columns in consolidate table: ' +
              ', '.join(arg_cdr_con_with_join_cond))
        print('Inserting into the consolidate table')
        raw_sql = sql_to_string(insert_script_loc)
        query = raw_sql.format(provider_prefix=provider_prefix,
                               arg_con=', '.join(arg_cdr_con_with_join_cond))
        cursor.execute(query)
        print('Inserted into consolidate cdr table. Elapsed time: {} seconds'.
              format(format_two_point_time(timer, time.time())))
        print('########## FINISHED CREATING CONSOLIDATE CDR TABLE ##########')
    def export_indicator_01_02_03(self):
        provider_prefix = self.config.provider_prefix
      # cdr_data_table = self.config.cdr_data_table
        output_data_path = self.config.output_data_path
        package = 'wb_indicators'
        cursor = self.hc.cursor
        

        print('########## export indicator_1_2_3 ##########')

        timer = time.time()

        export_indicator(cursor,package,provider_prefix,'indicator03_admin0_date',output_data_path,'indicator01_02_admin2_hour', time.time())
        export_indicator(cursor,package,provider_prefix,'indicator01_admin3_hour',output_data_path,'indicator01_02_admin3_hour', time.time())
        export_indicator(cursor,package,provider_prefix,'indicator03_admin0_date',output_data_path,'indicator03_admin0_date', time.time())
        export_indicator(cursor,package,provider_prefix,'indicator03_admin1_date',output_data_path,'indicator03_admin1_date', time.time())
        export_indicator(cursor,package,provider_prefix,'indicator03_admin2_date',output_data_path,'indicator03_admin2_date', time.time())
        export_indicator(cursor,package,provider_prefix,'indicator03_admin3_date',output_data_path,'indicator03_admin3_date', time.time())


      #   raw_sql = sql_to_string('wb_indicators/indicator01_admin2_hour_export.sql')
      #   query = raw_sql.format(provider_prefix=provider_prefix)
      #   cursor.execute(query)
      #   file_path = '{output_data_path}/{provider_prefix}_indicator01_02_admin2_hour.csv'.format(provider_prefix=provider_prefix,output_data_path=output_data_path)
      #   export_to_csv(file_path,cursor)

  
        print('########## FINISHED exprot indicator_1_2_3 ########## Elapsed time: {time} seconds'.format(time=format_two_point_time(timer, time.time())))
    def export_indicator_06_11(self):
        provider_prefix = self.config.provider_prefix
        output_data_path = self.config.output_data_path
        package = 'wb_indicators'
        cursor = self.hc.cursor
        

        print('########## export indicator_06_11 ##########')

        timer = time.time()

        export_indicator(cursor,package,provider_prefix,'indicator06_admin3_week',output_data_path,'indicator06_admin3_week', time.time())
        export_indicator(cursor,package,provider_prefix,'indicator06_admin2_week',output_data_path,'indicator06_admin2_week', time.time())
        export_indicator(cursor,package,provider_prefix,'indicator11_admin3_month',output_data_path,'indicator11_admin3_month', time.time())
        export_indicator(cursor,package,provider_prefix,'indicator11_admin2_month',output_data_path,'indicator11_admin2_month', time.time())
  
        print('########## FINISHED export indicator_06_11 ########## Elapsed time: {time} seconds'.format(time=format_two_point_time(timer, time.time())))
Ejemplo n.º 21
0
def execute_multiple(cursor,package,sql_filename,sql_params,timer):
    print('Execute multiple queries...')
    raw_sql = sql_to_string('{package}/{sql_filename}.sql'.format(package=package,sql_filename=sql_filename))
    # query = raw_sql.format(provider_prefix=provider_prefix, cdr_data_table=cdr_data_table)
    # params= {'provider_prefix':provider_prefix,'cdr_data_table':cdr_data_table}
    query = raw_sql.format(**sql_params)
    # cursor.execute(query)
    qList = query.split(";")
    for q in qList:
        if len(q.strip()) > 0:
            print('Execute {q}'.format(q=q))
            cursor.execute(q)

    print('Finised Execute multiple queries. Elapsed time: {time} seconds'.format(time=format_two_point_time(timer, time.time())))