Ejemplo n.º 1
0
def validation(analytics):
    sql_maxdate = 'SELECT MAX ([date]) AS "Max Date" FROM {}.dbo.{};'.format(
        DB_NAME, TABLE_NAME)
    last_insert = pd.read_sql(sql_maxdate, cnxn).iloc[0][0]

    if last_insert is None:
        ref_date = datetime.datetime.strptime('2017-06-28', '%Y-%m-%d').date()
    else:
        ref_date = last_insert + relativedelta(days=1)
        sql_lastbatch = "SELECT PK FROM {}.dbo.{}" \
                        " WHERE [date] = '{}'".format(DB_NAME, TABLE_NAME, last_insert)
        last_len_DB = len(cnxn.execute(sql_lastbatch).fetchall())
        last_len_GA = len(
            active_users.fetch_data_daily(VIEW_ID, analytics,
                                          last_insert.strftime('%Y-%m-%d'),
                                          'app'))
        if (last_len_GA - last_len_DB) > 0.001 * last_len_GA:
            doc = logger.create_log(
                'DB/GA Consistency',
                'Nack',
                hostname=socket.gethostname(),
                text='Corrupted Last Insert, truncate the last batch!',
                server_len=last_len_GA,
                database_len=last_len_DB)
            es_engine.log_into_es(es, 'textlogs-{}'.format(INDX), doc)
            sys.exit()
    return ref_date
Ejemplo n.º 2
0
def main():
    analytics = ga_engine.initialize_analyticsreporting('web')
    limit_date = datetime.datetime.now().date()
    ref_date = validation(analytics)

    for i in range((limit_date - ref_date).days - 1):
        step_time = ref_date + relativedelta(days=+i)
        year, month = jalali.Gregorian(step_time).persian_tuple()[0:2]
        custom_start = jalali.Persian(year, month, 1).gregorian_datetime()
        df_part1 = active_users.fetch_data_daily(
            VIEW_ID, analytics, step_time.strftime('%Y-%m-%d'), 'web')
        df_part1.columns = ['date', 'category', 'sessions', 'dailyUsers']
        df_part2 = active_users.fetch_data_monthly(
            VIEW_ID, analytics,
            step_time.replace(day=1).strftime('%Y-%m-%d'),
            step_time.strftime('%Y-%m-%d'), 'web')
        df_part2.columns = ['category', 'month', 'monthlyUsers']
        df_part3 = active_users.fetch_data_custom_wrapper(
            VIEW_ID, analytics, custom_start, step_time, 'monthlyUsersJalali',
            'web')
        df_part4 = active_users.fetch_data_custom_wrapper(
            VIEW_ID, analytics, step_time + relativedelta(days=-29), step_time,
            '30DaysWindow', 'web')

        df_part1['date'] = pd.to_datetime(df_part1['date'])
        total_df = df_part1.join(df_part2.set_index('category'), on='category')
        total_df = total_df.join(df_part3.set_index('category'), on='category')
        total_df = total_df.join(df_part4.set_index('category'), on='category')
        total_df.drop(['month'], axis=1, inplace=True)

        print(total_df)

        try:
            cursor.fast_executemany = True
            sql_comm = '''INSERT INTO [{}].[dbo].[{}]
            ([date],[category],[sessions],[dailyUsers],[monthlyUsers],[monthlyUsersJalali],[30DaysWindow])
             VALUES (?,?,?,?,?,?,?)'''.format(DB_NAME, TABLE_NAME)
            cursor.executemany(sql_comm, total_df.values.tolist())
            cursor.commit()
            doc = logger.create_log('Insert',
                                    'Ack',
                                    step_time,
                                    socket.gethostname(),
                                    'Successful Insert',
                                    server_len=len(total_df.index),
                                    database_len=len(total_df.index))
            es_engine.log_into_es(es, 'textlogs-{}'.format(INDX), doc)
        except Exception as e:
            doc = logger.create_log('Insert', 'Nack', step_time,
                                    socket.gethostname(), str(e))
            es_engine.log_into_es(es, 'textlogs-{}'.format(INDX), doc)

        time.sleep(2)