def update_docs_info(mode):
    if mode == 'historic':
        table_name = 'metrics.ganalytics_obpsorg_docs'
    elif mode == 'lastmonth':
        table_name = 'metrics.ganalytics_obpsorg_lastmonth_docs'
    num_rows = get_number_rows_db()
    columns = ['doc_path', 'countries', 'sessions', 'users']
    docs_info_df_agg = pd.DataFrame(columns=columns)
    docs_info_json = get_attr_info_db(num_rows, 'docs', mode)
    for i in range(0, len(docs_info_json)):
        docs_info_df = convert_json_to_df(docs_info_json[i][0])
        docs_info_df = docs_info_df.drop(columns='index')
        docs_info_df_agg = docs_info_df_agg.append(docs_info_df,
                                                   ignore_index=True)
        docs_info_df_agg_per_doc = docs_info_df_agg.groupby(
            ['doc_path']).sum().reset_index()
    cursor, conn = db.connect_db()
    query = '''DELETE FROM ''' + table_name + ';'
    db.delete_db(cursor, conn, query)
    for i in range(0, len(docs_info_df_agg_per_doc)):
        doc_path = docs_info_df_agg_per_doc.doc_path.iloc[i].encode('utf-8')
        countries_num = docs_info_df_agg_per_doc.countries.iloc[i]
        users_num = docs_info_df_agg_per_doc.users.iloc[i]
        sessions_num = str(docs_info_df_agg_per_doc.sessions.iloc[i])
        cursor, conn = db.connect_db()
        arguments = {
            'long1': long(countries_num),
            'long2': long(users_num),
            'long3': long(sessions_num),
            'str4': doc_path
        }
        query = 'INSERT INTO ' + table_name + ' (doc_path, countries, users, sessions) VALUES (%(str4)s, %(long1)s, %(long2)s, %(long3)s);'
        db.write_db(cursor, conn, query, arguments)
    return docs_info_df_agg_per_doc
Beispiel #2
0
def get_dates_from_db(table_name):
    # get dates already existing in table_name
    cursor, conn = db_connect.connect_db()
    query = 'SELECT date_start FROM ' + table_name + ';'
    dates_query = db_connect.query_db(cursor, conn, query)
    dates_list = pd.DataFrame(dates_query, columns=['date_start'])

    return dates_list
def main(date_start, date_end):
    #set list of dates to process
    date_start = '2020-06-01'
    date_end = '2020-07-01'
    dates_list = DateRanges.date_ranges(date_start, date_end)
    if len(dates_list) == 1:
        dates_list.append(date_end)

    #check already existing dates into db
    table_name = 'metrics.ganalytics_obpsorg'
    dates_df_db = DateRanges.get_dates_from_db(table_name)
    dates_list_db = []
    for dat in dates_df_db.date_start:
        dates_list_db.append(dat.strftime('%Y-%m-%d'))

    for i in range(0, len(dates_list) - 1):

        dum_date_end = datetime.strptime(dates_list[i + 1],
                                         '%Y-%m-%d') - timedelta(days=1)
        date_end = datetime.strftime(dum_date_end, '%Y-%m-%d')
        print 'Evaluating the period: ' + dates_list[i] + ' to ' + dates_list[
            i + 1]
        total_new_users, total_users, total_countries, total_sessions, countries_df, pagepaths_df, sessions_user_df, users_df, countries_info, docs_info, response, start_date, end_date = analytics_obps.main(
            dates_list[i], date_end)
        docs_access = pagepaths_df['doc_path'].count()
        cursor, conn = db.connect_db()

        arguments = {
            'date1': start_date,
            'date2': end_date,
            'long3': long(total_new_users),
            'long4': long(total_users),
            'long5': long(total_sessions),
            'long6': long(docs_access),
            'long7': long(total_countries),
            'json8': str(countries_info),
            'json9': str(docs_info)
        }

        if dates_list[i] in dates_list_db:
            print 'The period ' + dates_list[i] + ' to ' + dates_list[
                i +
                1] + ' alrady exists. Results will be overwritten for this period.'
            query = 'UPDATE metrics.ganalytics_obpsorg SET date_start = %(date1)s, date_end = %(date2)s, users_num_new = %(long3)s, users_num_total = %(long4)s, visits_num = %(long5)s, docs_access_num = %(long6)s, countries_num = %(long7)s, countries_info = %(json8)s, docs_info = %(json9)s WHERE date_start  = \'' + dates_list[
                i] + '\';'
        elif dates_list[i] not in dates_list_db:
            query = '''INSERT INTO metrics.ganalytics_obpsorg (date_start, date_end, users_num_new, users_num_total, visits_num, docs_access_num, countries_num, countries_info, docs_info) VALUES (%(date1)s, %(date2)s,%(long3)s,%(long4)s,%(long5)s,%(long6)s,%(long7)s,%(json8)s, %(json9)s);'''

        db.write_db(cursor, conn, query, arguments)

    countries_info_df_agg_per_country = AggregateInfo.update_countries_info(
        'historic')
    docs_info_df_agg_per_doc = AggregateInfo.update_docs_info('historic')
def update_countries_info(mode):
    if mode == 'historic_mainlanding':
        table_name = 'metrics.ganalytics_obpsystem_countries'
    elif mode == 'historic':
        table_name = 'metrics.ganalytics_obpsorg_countries'
    elif mode == 'lastmonth':
        table_name = 'metrics.ganalytics_obpsorg_lastmonth_countries'

    num_rows = get_number_rows_db()
    columns = ['country', 'sessions', 'users']
    countries_info_df_agg = pd.DataFrame(columns=columns)
    countries_info_json = get_attr_info_db(num_rows, 'countries', mode)
    for i in range(0, len(countries_info_json)):
        countries_info_df = convert_json_to_df(countries_info_json[i][0])
        #countries_info_df = countries_info_df.reset_index()
        countries_info_df = countries_info_df.drop(columns='index')
        countries_info_df_agg = countries_info_df_agg.append(countries_info_df,
                                                             ignore_index=True)
        countries_info_df_agg_per_country = countries_info_df_agg.groupby(
            ['country']).sum().reset_index()

    cursor, conn = db.connect_db()
    query = '''DELETE FROM ''' + table_name + ';'
    db.delete_db(cursor, conn, query)
    for i in range(0, len(countries_info_df_agg_per_country)):
        country_name = countries_info_df_agg_per_country.country.iloc[
            i].encode('utf-8')
        users_num = countries_info_df_agg_per_country.users.iloc[i]
        sessions_num = str(countries_info_df_agg_per_country.sessions.iloc[i])
        cursor, conn = db.connect_db()
        arguments = {
            'str1': country_name,
            'long2': long(users_num),
            'long3': long(sessions_num)
        }
        query = 'INSERT INTO ' + table_name + ' (country, users, sessions) VALUES (%(str1)s, %(long2)s,%(long3)s);'
        db.write_db(cursor, conn, query, arguments)
    return countries_info_df_agg_per_country
def get_attr_info_db(num_rows, attr, mode):
    cursor, conn = db.connect_db()
    if attr == 'countries':
        if mode == 'historic_mainlanding':
            table = 'ganalytics_obpsystem'
            column = table + '.countries_info'
        if mode == 'historic':
            table = 'ganalytics_obpsorg'
            column = table + '.countries_info'
        elif mode == 'lastmonth':
            table = 'ganalytics_obpsorg_lastmonth'
            column = table + '.countries_info'
    elif attr == 'docs':
        if mode == 'historic':
            table = 'ganalytics_obpsorg'
            column = table + '.docs_info'
        elif mode == 'lastmonth':
            table = 'ganalytics_obpsorg_lastmonth'
            column = table + '.docs_info'
    query = 'SELECT ' + column + ' FROM metrics.' + table + ' LIMIT ' + str(
        num_rows) + ';'
    attr_info_json = db.query_db(cursor, conn, query)
    return attr_info_json
def get_number_rows_db():
    cursor, conn = db.connect_db()
    query = 'SELECT COUNT (*) FROM metrics.ganalytics_obpsorg;'
    output = db.query_db(cursor, conn, query)
    num_rows = int(output[0][0])
    return num_rows
def export_corrections_to_db(cond_corr_atts, salt_corr_atts,
                             metadata_file_info):
    ## Read deployment_name from database
    query = [
        'SELECT deployment_id FROM instrumentation.deployment WHERE deployment_code = '
        + '\'' + metadata_file_info['deployment_code'][3:7] + '\'' +
        ' AND deployment_instrument_id = (SELECT instrument_id FROM instrumentation.instrument WHERE instrument_name = '
        + '\'' + metadata_file_info['instrument_name'].upper() + '\'' + ');'
    ]
    cursor, conn = db_connect.connect_db()
    deployment_id = db_connect.query_db(cursor, conn, query[0])
    #convert to integer
    deployment_id = list(deployment_id[0])
    deployment_id = deployment_id[0]
    deployment_id = int(deployment_id)

    user_id = 1

    ## Check existing corrections done in database
    if metadata_file_info['platform_subtype'] == 'ctd':
        query = 'SELECT ctd_salinity_correction_deployment_id FROM corrections.ctd_salinity_correction;'

    elif metadata_file_info['platform_subtype'] == 'glider':
        query = 'SELECT glider_salinity_correction_deployment_id FROM corrections.glider_salinity_correction;'

    cursor, conn = db_connect.connect_db()
    deployment_id_query = db_connect.query_db(cursor, conn, query)
    deployment_id_list = pd.DataFrame(deployment_id_query,
                                      columns=['deployment_id'])

    ## CTD
    if metadata_file_info['platform_subtype'] == 'ctd':
        ## Write correction metadata into database
        arguments = {
            'int1':
            deployment_id,
            'float1':
            cond_corr_atts['cond_01_corr_atts']['correction_coefficient_A'],
            #                     'float1': cond_corr_atts['cond_01_corr_atts'][list(cond_corr_atts['cond_01_corr_atts'])[2]], # correction coefficient
            'float2':
            salt_corr_atts['salt_01_corr_atts'][list(
                salt_corr_atts['salt_01_corr_atts']
            )[2]],  # mean residual salinity differences
            'float3':
            salt_corr_atts['salt_01_corr_atts'][list(
                salt_corr_atts['salt_01_corr_atts']
            )[4]],  # std residual salinity differences
            'date1':
            datetime.now().date().strftime('%Y-%m-%d'),
            'int2':
            user_id,
            'date2':
            datetime.now().date().strftime('%Y-%m-%d'),
            'int3':
            user_id
        }
        if len(cond_corr_atts) == 2:
            arguments['float4'] = cond_corr_atts['cond_02_corr_atts'][
                'correction_coefficient_B']
            arguments['float5'] = salt_corr_atts['salt_02_corr_atts'][
                'residual_salinity_differences_mean']
            arguments['float6'] = salt_corr_atts['salt_02_corr_atts'][
                'residual_salinity_differences_std']

        if deployment_id in deployment_id_list.deployment_id.values:
            if len(cond_corr_atts) == 1:
                query = [
                    'UPDATE corrections.ctd_salinity_correction SET ' +
                    'ctd_salinity_correction_sensor_01_corr_coeff = %(float1)s, '
                    +
                    'ctd_salinity_correction_sensor_01_mean_resid = %(float2)s, '
                    +
                    'ctd_salinity_correction_sensor_01_std_resid = %(float3)s, '
                    + 'updated_on = %(date2)s, ' +
                    'updated_by_id = %(int3)s ' +
                    'WHERE ctd_salinity_correction_deployment_id = \'' +
                    str(deployment_id) + '\';'
                ]
            elif len(cond_corr_atts) == 2:
                query = [
                    'UPDATE corrections.ctd_salinity_correction SET ' +
                    'ctd_salinity_correction_sensor_01_corr_coeff = %(float1)s, '
                    +
                    'ctd_salinity_correction_sensor_01_mean_resid = %(float2)s, '
                    +
                    'ctd_salinity_correction_sensor_01_std_resid = %(float3)s, '
                    +
                    'ctd_salinity_correction_sensor_02_corr_coeff = %(float4)s, '
                    +
                    'ctd_salinity_correction_sensor_02_mean_resid = %(float5)s, '
                    +
                    'ctd_salinity_correction_sensor_02_std_resid = %(float6)s, '
                    + 'updated_on = %(date2)s, ' +
                    'updated_by_id = %(int3)s ' +
                    'WHERE ctd_salinity_correction_deployment_id = \'' +
                    str(deployment_id) + '\';'
                ]

        elif deployment_id not in deployment_id_list.deployment_id.values or deployment_id_list.empty(
        ):
            query = [
                'INSERT INTO corrections.ctd_salinity_correction' +
                '(ctd_salinity_correction_deployment_id, ctd_salinity_correction_sensor_01_corr_coeff, ctd_salinity_correction_sensor_01_mean_resid,'
                +
                'ctd_salinity_correction_sensor_01_std_resid, created_on, created_by_id, updated_on, updated_by_id)'
                +
                'VALUES (%(int1)s, %(float1)s, %(float2)s, %(float3)s, %(date1)s, %(int2)s, %(date2)s, %(int3)s);'
            ]

    ## Glider
    elif metadata_file_info['platform_subtype'] == 'glider':
        # Get CTD correction deployment info
        ctd_deployment_info = salt_corr_atts['salt_01_corr_atts'][
            'background_data_used_for_correction']
        ctd_deployment_info = ctd_deployment_info.split('dep', 1)[1]
        ctd_deployment_info = ctd_deployment_info.split('_')
        # Get CTD correction deployment id from database
        query = [
            'SELECT deployment_id FROM instrumentation.deployment WHERE deployment_code = '
            + '\'' + str(ctd_deployment_info[0]) + '\'' +
            ' AND deployment_instrument_id = (SELECT instrument_id FROM instrumentation.instrument WHERE instrument_name = '
            + '\'' + str(ctd_deployment_info[2].upper()) + '\'' + ');'
        ]
        cursor, conn = db_connect.connect_db()
        ctd_deployment_id = db_connect.query_db(cursor, conn, query[0])
        #convert to integer
        ctd_deployment_id = list(ctd_deployment_id[0])
        ctd_deployment_id = ctd_deployment_id[0]
        ctd_deployment_id = int(ctd_deployment_id)

        query = [
            'SELECT ctd_salinity_correction_id FROM corrections.ctd_salinity_correction WHERE ctd_salinity_correction_deployment_id = '
            + '\'' + str(ctd_deployment_id) + '\'' + ';'
        ]
        cursor, conn = db_connect.connect_db()
        ctd_corr_deployment_id = db_connect.query_db(cursor, conn, query[0])
        #convert to integer
        ctd_corr_deployment_id = list(ctd_corr_deployment_id[0])
        ctd_corr_deployment_id = ctd_corr_deployment_id[0]
        ctd_corr_deployment_id = int(ctd_corr_deployment_id)

        ## Write correction metadata into database
        arguments = {
            'int1':
            deployment_id,
            'float1':
            cond_corr_atts['cond_01_corr_atts']['correction_coefficient_A'],
            'text1':
            salt_corr_atts['salt_01_corr_atts']
            ['residual_salinity_differences_std_background_data'],
            'float3':
            salt_corr_atts['salt_01_corr_atts']['salinity_error_estimate'],
            'int2':
            ctd_corr_deployment_id,
            'text2':
            cond_corr_atts['cond_01_corr_atts']
            ['theta-sal_whitespace_for_correction'],
            'date1':
            datetime.now().date().strftime('%Y-%m-%d'),
            'int3':
            user_id,
            'date2':
            datetime.now().date().strftime('%Y-%m-%d'),
            'int4':
            user_id
        }

        if deployment_id in deployment_id_list.deployment_id.values:
            query = [
                'UPDATE corrections.glider_salinity_correction SET ' +
                'glider_salinity_correction_sensor_01_corr_coeff = %(float1)s, '
                +
                'glider_salinity_correction_residual_salinity_differences = %(text1)s, '
                +
                'glider_salinity_correction_salinity_error_estimate = %(float3)s, '
                +
                'glider_salinity_correction_background_data_id = %(int2)s, ' +
                'glider_salinity_correction_theta_sal_range = %(text2)s, ' +
                'updated_on = %(date2)s, ' + 'updated_by_id = %(int3)s ' +
                'WHERE glider_salinity_correction_deployment_id = \'' +
                str(deployment_id) + '\';'
            ]

        elif deployment_id not in deployment_id_list.deployment_id.values or deployment_id_list.empty(
        ):
            query = [
                'INSERT INTO corrections.glider_salinity_correction ' +
                '(glider_salinity_correction_deployment_id, glider_salinity_correction_sensor_01_corr_coeff, glider_salinity_correction_residual_salinity_differences,'
                +
                'glider_salinity_correction_salinity_error_estimate, glider_salinity_correction_background_data_id,'
                + 'glider_salinity_correction_theta_sal_range,' +
                'created_on, created_by_id, updated_on, updated_by_id) ' +
                'VALUES (%(int1)s, %(float1)s, %(text1)s, %(float3)s, %(int2)s, %(text2)s, %(date1)s, %(int3)s, %(date2)s, %(int4)s);'
            ]

        #cond_corr_atts['cond_01_corr_atts']['background_data_used_for_correction']

    cursor, conn = db_connect.connect_db()
    db_connect.write_db(cursor, conn, query[0], arguments)