def update_docs_info(mode): if mode == 'historic': table_name = 'metrics.ganalytics_obpsorg_docs' elif mode == 'lastmonth': table_name = 'metrics.ganalytics_obpsorg_lastmonth_docs' num_rows = get_number_rows_db() columns = ['doc_path', 'countries', 'sessions', 'users'] docs_info_df_agg = pd.DataFrame(columns=columns) docs_info_json = get_attr_info_db(num_rows, 'docs', mode) for i in range(0, len(docs_info_json)): docs_info_df = convert_json_to_df(docs_info_json[i][0]) docs_info_df = docs_info_df.drop(columns='index') docs_info_df_agg = docs_info_df_agg.append(docs_info_df, ignore_index=True) docs_info_df_agg_per_doc = docs_info_df_agg.groupby( ['doc_path']).sum().reset_index() cursor, conn = db.connect_db() query = '''DELETE FROM ''' + table_name + ';' db.delete_db(cursor, conn, query) for i in range(0, len(docs_info_df_agg_per_doc)): doc_path = docs_info_df_agg_per_doc.doc_path.iloc[i].encode('utf-8') countries_num = docs_info_df_agg_per_doc.countries.iloc[i] users_num = docs_info_df_agg_per_doc.users.iloc[i] sessions_num = str(docs_info_df_agg_per_doc.sessions.iloc[i]) cursor, conn = db.connect_db() arguments = { 'long1': long(countries_num), 'long2': long(users_num), 'long3': long(sessions_num), 'str4': doc_path } query = 'INSERT INTO ' + table_name + ' (doc_path, countries, users, sessions) VALUES (%(str4)s, %(long1)s, %(long2)s, %(long3)s);' db.write_db(cursor, conn, query, arguments) return docs_info_df_agg_per_doc
def get_dates_from_db(table_name): # get dates already existing in table_name cursor, conn = db_connect.connect_db() query = 'SELECT date_start FROM ' + table_name + ';' dates_query = db_connect.query_db(cursor, conn, query) dates_list = pd.DataFrame(dates_query, columns=['date_start']) return dates_list
def main(date_start, date_end): #set list of dates to process date_start = '2020-06-01' date_end = '2020-07-01' dates_list = DateRanges.date_ranges(date_start, date_end) if len(dates_list) == 1: dates_list.append(date_end) #check already existing dates into db table_name = 'metrics.ganalytics_obpsorg' dates_df_db = DateRanges.get_dates_from_db(table_name) dates_list_db = [] for dat in dates_df_db.date_start: dates_list_db.append(dat.strftime('%Y-%m-%d')) for i in range(0, len(dates_list) - 1): dum_date_end = datetime.strptime(dates_list[i + 1], '%Y-%m-%d') - timedelta(days=1) date_end = datetime.strftime(dum_date_end, '%Y-%m-%d') print 'Evaluating the period: ' + dates_list[i] + ' to ' + dates_list[ i + 1] total_new_users, total_users, total_countries, total_sessions, countries_df, pagepaths_df, sessions_user_df, users_df, countries_info, docs_info, response, start_date, end_date = analytics_obps.main( dates_list[i], date_end) docs_access = pagepaths_df['doc_path'].count() cursor, conn = db.connect_db() arguments = { 'date1': start_date, 'date2': end_date, 'long3': long(total_new_users), 'long4': long(total_users), 'long5': long(total_sessions), 'long6': long(docs_access), 'long7': long(total_countries), 'json8': str(countries_info), 'json9': str(docs_info) } if dates_list[i] in dates_list_db: print 'The period ' + dates_list[i] + ' to ' + dates_list[ i + 1] + ' alrady exists. Results will be overwritten for this period.' query = 'UPDATE metrics.ganalytics_obpsorg SET date_start = %(date1)s, date_end = %(date2)s, users_num_new = %(long3)s, users_num_total = %(long4)s, visits_num = %(long5)s, docs_access_num = %(long6)s, countries_num = %(long7)s, countries_info = %(json8)s, docs_info = %(json9)s WHERE date_start = \'' + dates_list[ i] + '\';' elif dates_list[i] not in dates_list_db: query = '''INSERT INTO metrics.ganalytics_obpsorg (date_start, date_end, users_num_new, users_num_total, visits_num, docs_access_num, countries_num, countries_info, docs_info) VALUES (%(date1)s, %(date2)s,%(long3)s,%(long4)s,%(long5)s,%(long6)s,%(long7)s,%(json8)s, %(json9)s);''' db.write_db(cursor, conn, query, arguments) countries_info_df_agg_per_country = AggregateInfo.update_countries_info( 'historic') docs_info_df_agg_per_doc = AggregateInfo.update_docs_info('historic')
def update_countries_info(mode): if mode == 'historic_mainlanding': table_name = 'metrics.ganalytics_obpsystem_countries' elif mode == 'historic': table_name = 'metrics.ganalytics_obpsorg_countries' elif mode == 'lastmonth': table_name = 'metrics.ganalytics_obpsorg_lastmonth_countries' num_rows = get_number_rows_db() columns = ['country', 'sessions', 'users'] countries_info_df_agg = pd.DataFrame(columns=columns) countries_info_json = get_attr_info_db(num_rows, 'countries', mode) for i in range(0, len(countries_info_json)): countries_info_df = convert_json_to_df(countries_info_json[i][0]) #countries_info_df = countries_info_df.reset_index() countries_info_df = countries_info_df.drop(columns='index') countries_info_df_agg = countries_info_df_agg.append(countries_info_df, ignore_index=True) countries_info_df_agg_per_country = countries_info_df_agg.groupby( ['country']).sum().reset_index() cursor, conn = db.connect_db() query = '''DELETE FROM ''' + table_name + ';' db.delete_db(cursor, conn, query) for i in range(0, len(countries_info_df_agg_per_country)): country_name = countries_info_df_agg_per_country.country.iloc[ i].encode('utf-8') users_num = countries_info_df_agg_per_country.users.iloc[i] sessions_num = str(countries_info_df_agg_per_country.sessions.iloc[i]) cursor, conn = db.connect_db() arguments = { 'str1': country_name, 'long2': long(users_num), 'long3': long(sessions_num) } query = 'INSERT INTO ' + table_name + ' (country, users, sessions) VALUES (%(str1)s, %(long2)s,%(long3)s);' db.write_db(cursor, conn, query, arguments) return countries_info_df_agg_per_country
def get_attr_info_db(num_rows, attr, mode): cursor, conn = db.connect_db() if attr == 'countries': if mode == 'historic_mainlanding': table = 'ganalytics_obpsystem' column = table + '.countries_info' if mode == 'historic': table = 'ganalytics_obpsorg' column = table + '.countries_info' elif mode == 'lastmonth': table = 'ganalytics_obpsorg_lastmonth' column = table + '.countries_info' elif attr == 'docs': if mode == 'historic': table = 'ganalytics_obpsorg' column = table + '.docs_info' elif mode == 'lastmonth': table = 'ganalytics_obpsorg_lastmonth' column = table + '.docs_info' query = 'SELECT ' + column + ' FROM metrics.' + table + ' LIMIT ' + str( num_rows) + ';' attr_info_json = db.query_db(cursor, conn, query) return attr_info_json
def get_number_rows_db(): cursor, conn = db.connect_db() query = 'SELECT COUNT (*) FROM metrics.ganalytics_obpsorg;' output = db.query_db(cursor, conn, query) num_rows = int(output[0][0]) return num_rows
def export_corrections_to_db(cond_corr_atts, salt_corr_atts, metadata_file_info): ## Read deployment_name from database query = [ 'SELECT deployment_id FROM instrumentation.deployment WHERE deployment_code = ' + '\'' + metadata_file_info['deployment_code'][3:7] + '\'' + ' AND deployment_instrument_id = (SELECT instrument_id FROM instrumentation.instrument WHERE instrument_name = ' + '\'' + metadata_file_info['instrument_name'].upper() + '\'' + ');' ] cursor, conn = db_connect.connect_db() deployment_id = db_connect.query_db(cursor, conn, query[0]) #convert to integer deployment_id = list(deployment_id[0]) deployment_id = deployment_id[0] deployment_id = int(deployment_id) user_id = 1 ## Check existing corrections done in database if metadata_file_info['platform_subtype'] == 'ctd': query = 'SELECT ctd_salinity_correction_deployment_id FROM corrections.ctd_salinity_correction;' elif metadata_file_info['platform_subtype'] == 'glider': query = 'SELECT glider_salinity_correction_deployment_id FROM corrections.glider_salinity_correction;' cursor, conn = db_connect.connect_db() deployment_id_query = db_connect.query_db(cursor, conn, query) deployment_id_list = pd.DataFrame(deployment_id_query, columns=['deployment_id']) ## CTD if metadata_file_info['platform_subtype'] == 'ctd': ## Write correction metadata into database arguments = { 'int1': deployment_id, 'float1': cond_corr_atts['cond_01_corr_atts']['correction_coefficient_A'], # 'float1': cond_corr_atts['cond_01_corr_atts'][list(cond_corr_atts['cond_01_corr_atts'])[2]], # correction coefficient 'float2': salt_corr_atts['salt_01_corr_atts'][list( salt_corr_atts['salt_01_corr_atts'] )[2]], # mean residual salinity differences 'float3': salt_corr_atts['salt_01_corr_atts'][list( salt_corr_atts['salt_01_corr_atts'] )[4]], # std residual salinity differences 'date1': datetime.now().date().strftime('%Y-%m-%d'), 'int2': user_id, 'date2': datetime.now().date().strftime('%Y-%m-%d'), 'int3': user_id } if len(cond_corr_atts) == 2: arguments['float4'] = cond_corr_atts['cond_02_corr_atts'][ 'correction_coefficient_B'] arguments['float5'] = salt_corr_atts['salt_02_corr_atts'][ 'residual_salinity_differences_mean'] arguments['float6'] = salt_corr_atts['salt_02_corr_atts'][ 'residual_salinity_differences_std'] if deployment_id in deployment_id_list.deployment_id.values: if len(cond_corr_atts) == 1: query = [ 'UPDATE corrections.ctd_salinity_correction SET ' + 'ctd_salinity_correction_sensor_01_corr_coeff = %(float1)s, ' + 'ctd_salinity_correction_sensor_01_mean_resid = %(float2)s, ' + 'ctd_salinity_correction_sensor_01_std_resid = %(float3)s, ' + 'updated_on = %(date2)s, ' + 'updated_by_id = %(int3)s ' + 'WHERE ctd_salinity_correction_deployment_id = \'' + str(deployment_id) + '\';' ] elif len(cond_corr_atts) == 2: query = [ 'UPDATE corrections.ctd_salinity_correction SET ' + 'ctd_salinity_correction_sensor_01_corr_coeff = %(float1)s, ' + 'ctd_salinity_correction_sensor_01_mean_resid = %(float2)s, ' + 'ctd_salinity_correction_sensor_01_std_resid = %(float3)s, ' + 'ctd_salinity_correction_sensor_02_corr_coeff = %(float4)s, ' + 'ctd_salinity_correction_sensor_02_mean_resid = %(float5)s, ' + 'ctd_salinity_correction_sensor_02_std_resid = %(float6)s, ' + 'updated_on = %(date2)s, ' + 'updated_by_id = %(int3)s ' + 'WHERE ctd_salinity_correction_deployment_id = \'' + str(deployment_id) + '\';' ] elif deployment_id not in deployment_id_list.deployment_id.values or deployment_id_list.empty( ): query = [ 'INSERT INTO corrections.ctd_salinity_correction' + '(ctd_salinity_correction_deployment_id, ctd_salinity_correction_sensor_01_corr_coeff, ctd_salinity_correction_sensor_01_mean_resid,' + 'ctd_salinity_correction_sensor_01_std_resid, created_on, created_by_id, updated_on, updated_by_id)' + 'VALUES (%(int1)s, %(float1)s, %(float2)s, %(float3)s, %(date1)s, %(int2)s, %(date2)s, %(int3)s);' ] ## Glider elif metadata_file_info['platform_subtype'] == 'glider': # Get CTD correction deployment info ctd_deployment_info = salt_corr_atts['salt_01_corr_atts'][ 'background_data_used_for_correction'] ctd_deployment_info = ctd_deployment_info.split('dep', 1)[1] ctd_deployment_info = ctd_deployment_info.split('_') # Get CTD correction deployment id from database query = [ 'SELECT deployment_id FROM instrumentation.deployment WHERE deployment_code = ' + '\'' + str(ctd_deployment_info[0]) + '\'' + ' AND deployment_instrument_id = (SELECT instrument_id FROM instrumentation.instrument WHERE instrument_name = ' + '\'' + str(ctd_deployment_info[2].upper()) + '\'' + ');' ] cursor, conn = db_connect.connect_db() ctd_deployment_id = db_connect.query_db(cursor, conn, query[0]) #convert to integer ctd_deployment_id = list(ctd_deployment_id[0]) ctd_deployment_id = ctd_deployment_id[0] ctd_deployment_id = int(ctd_deployment_id) query = [ 'SELECT ctd_salinity_correction_id FROM corrections.ctd_salinity_correction WHERE ctd_salinity_correction_deployment_id = ' + '\'' + str(ctd_deployment_id) + '\'' + ';' ] cursor, conn = db_connect.connect_db() ctd_corr_deployment_id = db_connect.query_db(cursor, conn, query[0]) #convert to integer ctd_corr_deployment_id = list(ctd_corr_deployment_id[0]) ctd_corr_deployment_id = ctd_corr_deployment_id[0] ctd_corr_deployment_id = int(ctd_corr_deployment_id) ## Write correction metadata into database arguments = { 'int1': deployment_id, 'float1': cond_corr_atts['cond_01_corr_atts']['correction_coefficient_A'], 'text1': salt_corr_atts['salt_01_corr_atts'] ['residual_salinity_differences_std_background_data'], 'float3': salt_corr_atts['salt_01_corr_atts']['salinity_error_estimate'], 'int2': ctd_corr_deployment_id, 'text2': cond_corr_atts['cond_01_corr_atts'] ['theta-sal_whitespace_for_correction'], 'date1': datetime.now().date().strftime('%Y-%m-%d'), 'int3': user_id, 'date2': datetime.now().date().strftime('%Y-%m-%d'), 'int4': user_id } if deployment_id in deployment_id_list.deployment_id.values: query = [ 'UPDATE corrections.glider_salinity_correction SET ' + 'glider_salinity_correction_sensor_01_corr_coeff = %(float1)s, ' + 'glider_salinity_correction_residual_salinity_differences = %(text1)s, ' + 'glider_salinity_correction_salinity_error_estimate = %(float3)s, ' + 'glider_salinity_correction_background_data_id = %(int2)s, ' + 'glider_salinity_correction_theta_sal_range = %(text2)s, ' + 'updated_on = %(date2)s, ' + 'updated_by_id = %(int3)s ' + 'WHERE glider_salinity_correction_deployment_id = \'' + str(deployment_id) + '\';' ] elif deployment_id not in deployment_id_list.deployment_id.values or deployment_id_list.empty( ): query = [ 'INSERT INTO corrections.glider_salinity_correction ' + '(glider_salinity_correction_deployment_id, glider_salinity_correction_sensor_01_corr_coeff, glider_salinity_correction_residual_salinity_differences,' + 'glider_salinity_correction_salinity_error_estimate, glider_salinity_correction_background_data_id,' + 'glider_salinity_correction_theta_sal_range,' + 'created_on, created_by_id, updated_on, updated_by_id) ' + 'VALUES (%(int1)s, %(float1)s, %(text1)s, %(float3)s, %(int2)s, %(text2)s, %(date1)s, %(int3)s, %(date2)s, %(int4)s);' ] #cond_corr_atts['cond_01_corr_atts']['background_data_used_for_correction'] cursor, conn = db_connect.connect_db() db_connect.write_db(cursor, conn, query[0], arguments)