def query_data_frame(db_dict, sql, result=True):
    with MySQLInstance(**db_dict, dict_result=result) as db:
        if db.query(sql):
            return DataFrame(db.query(sql))
        else:
            logger.info('No result.')
            sys.exit()
def main():
    store_info_df = query_data_frame(mysql_db_ppzck_task, sql_get_store_info)
    data = []

    for index1 in range(8):
        logger.info(sql_data %
                    (category[index1], task_new[index1], category[index1]))
        data.append(
            query_data_frame(
                mysql_db_bi_task, sql_data %
                (category[index1], task_new[index1], category[index1])))
        data[index1].drop_duplicates(subset='addressIDnum', inplace=True)
        store_info_df = pd.merge(
            store_info_df,
            data[index1].reindex(columns=['addressIDnum', 'rid']),
            how='inner',
            on='addressIDnum',
            sort=False,
            copy=False)

    result = pd.concat(data)
    result[
        'shelf_url'] = 'http://pc.ppznet.com/task_pc//shenhe/aicorrect/images.jsp?responseid=' + result[
            'rid']
    result_gb = result.groupby('addressIDnum').sum()
    result_gb.reset_index(inplace=True)
    result_gb['category'] = 'total'
    result = pd.concat([result, result_gb], sort=False)
    result['month'].fillna(method='ffill', inplace=True)
    store_info_df.drop(columns=store_info_df.columns[15:], inplace=True)
    result = pd.merge(store_info_df,
                      result,
                      how='inner',
                      on='addressIDnum',
                      sort=False,
                      copy=False)

    engine = create_engine("mysql+pymysql://{}:{}@{}:{}/{}?charset={}".format(
        mysql_db_bi_task['username'], mysql_db_bi_task['password'],
        mysql_db_bi_task['host'], mysql_db_bi_task['port'],
        mysql_db_bi_task['schema'], 'utf8'))
    con = engine.connect()
    result.to_sql(name='t_pg_report_hsm_visualization',
                  con=con,
                  if_exists='replace',
                  index=False)
    con.close()
    with MySQLInstance(**mysql_db_bi_task, dict_result=False) as db:
        count = int(db.query(sql_get_count)[0][0])
        date = datetime.now().strftime('%Y%m%d')
        db.execute(sql_delete_count % date)
        db.execute(sql_insert_count % (date, count))
Esempio n. 3
0
def to_two_wave_result(i):
    logger.info('Run PID (%s)...' % os.getpid())
    new_df, check_number_new_df = to_one_wave_result(
        rid_df_list_new[i], sku_df_list_new[i], answer_df_list_new[i], sku_verification_df_list_new[i],
        multi_option_df_list_new[i], year_new[i], i)
    old_df, check_number_old_df = to_one_wave_result(
        rid_df_list_old[i], sku_df_list_old[i], answer_df_list_old[i], sku_verification_df_list_old[i],
        multi_option_df_list_old[i], year_old[i], i)
    new_df = pd.merge(new_df, store_new_df, how='left', on='addressIDnum')
    old_df = pd.merge(old_df, store_old_df, how='left', on='addressIDnum')
    new_df = pd.merge(new_df, old_df, how='left', on='addressIDnum', suffixes=('', '_old'))
    new_df.drop_duplicates(subset='rid', inplace=True)
    new_df['check_total_shelf'] = new_df.apply(
        lambda x: check_vs_pp_total_shelf(x.total_shelf, x.total_shelf_old), axis=1)
    new_df['check_pg_shelf'] = new_df.apply(
        lambda x: check_vs_pp_pg_shelf(x.pg_shelf, x.pg_shelf_old), axis=1)
    new_df['check_pg_display'] = new_df.apply(
        lambda x: check_vs_pp_pg_display(x.total_display, x.pg_display, x.total_display_old, x.pg_display_old), axis=1)
    new_df['check_pg_sku'] = new_df.apply(
        lambda x: check_vs_pp_sku(i, x.actual_sku, x.actual_sku_old), axis=1)
    new_df['check_all'] = new_df.apply(
        lambda x: check_all(x.check_recent, x.shelf_display_verification_1, x.shelf_display_verification_2,
                            x.sku_verification_count, x.check_total_shelf, x.check_pg_shelf,
                            x.check_pg_display, x.check_pg_sku), axis=1)
    report_new_df = new_df.reindex(columns=report_order[i])
    report_old_df = old_df.reindex(columns=report_order[i])
    checkpoint_new_df = new_df.reindex(columns=checkpoint_order[i])
    checkpoint_old_df = old_df.reindex(columns=checkpoint_order[i])
    report_new_df.to_excel(new_file[i], category[i], columns=report_order[i], index=False)
    report_old_df.to_excel(old_file[i], category[i], columns=report_order[i], index=False)
    writer_checkpoint = pd.ExcelWriter(checkpoint_file[i])
    checkpoint_new_df.to_excel(writer_checkpoint, category[i] + '_new', index=False)
    checkpoint_old_df.to_excel(writer_checkpoint, category[i] + '_old', index=False)
    check_number_new_df.to_excel(writer_checkpoint, category[i] + '_number_new', index=False)
    check_number_old_df.to_excel(writer_checkpoint, category[i] + '_number_old', index=False)
    writer_checkpoint.close()
    subject = category[i] + datetime.now().strftime('%Y-%m-%d')
    contents = ['附件中为前后两月数据及需检查的数据', ]
    attachments = [new_file[i], old_file[i], checkpoint_file[i]]
    with EmailSender(**email) as email_sender:
        email_sender.send_email(to=to, subject=subject, contents=contents, attachments=attachments)
    os.remove(new_file[i])
    os.remove(old_file[i])
    os.remove(checkpoint_file[i])
    with MySQLInstance(**mysql_db_bi_task, dict_result=True) as db:
        db.executemany(sql_insert_report_new, [tuple(x) for x in new_df.reindex(columns=insert_table_order_new).values])
        db.executemany(sql_insert_report_old, [tuple(x) for x in old_df.reindex(columns=insert_table_order_old).values])
def main(index):
    logger.info('Run PID %s (%s)...' % (index, os.getpid()))
    with MySQLInstance(**mysql_db_ppzck_task, dict_result=False) as db:
        tuple_url_data = db.query(sql_get_url % (time_dict[index] * 6))
    for tup in tuple_url_data:
        for md in tup[5].split(';'):
            file_path = os.path.join(
                os.path.dirname(os.path.realpath(__file__)), 'media_download',
                string_normalize(tup[0]), string_normalize(tup[1]),
                string_normalize(tup[2]), string_normalize(tup[3]))
            if not os.path.exists(file_path):
                os.makedirs(file_path)
            url = tup[4] + md
            response = requests.get(url)
            med = response.content
            with open(os.path.join(file_path, md.replace(':', '_')),
                      'wb') as f:
                f.write(med)
    time.sleep(1)
Esempio n. 5
0
def generate_sku_df(df1, df2, i):
    df = pd.merge(df1, store_info_df, how='left', on='addressIDnum', sort=False, copy=False)
    df = pd.merge(df, sku_info_df[sku_info_df['category'] == category[i]],
                  how='left', on='mark', sort=False, copy=False)
    df = pd.merge(df, df2, how='left', on=['rid', 'product_id'], sort=False, copy=False)
    df['answer_new'] = df.apply(lambda x: sku_normalization_answer(x.RD, x.status, x.hnhb), axis=1)
    df = df[df['answer_new'] != -1]
    df['fast_growing_sku_exist'] = df.apply(lambda x: fast_growing_sku_answer(x.answer_new, x.fast_growing), axis=1)
    sku_df = df.reindex(columns=['rid', 'mark', 'target_sku', 'product_name', 'answer_new'])
    sku_df = sku_df.set_index(['rid', 'mark', 'target_sku', 'product_name']).unstack()
    sku_df.columns = sku_df.columns.droplevel(0)
    sku_df.reset_index(level=[1, 2], inplace=True)
    agg_df = df.groupby('rid')['answer_new', 'fast_growing_sku_exist'].sum()
    agg_df.rename(columns={'answer_new': 'actual_sku',
                           'fast_growing_sku_exist': 'actual_fast_growing_sku'}, inplace=True)
    agg_df['base_sku'] = agg_df['actual_sku'] - agg_df['actual_fast_growing_sku']
    sku_df = sku_df.join(agg_df)
    if i == 5:
        agg1_df = df[df['denominator_option1'] != 0].groupby(
            by=['rid', 'denominator_option1']).agg({'answer_new': np.sum, 'numerator_option1': np.max})
        agg1_df['actual_sku'] = agg1_df.apply(lambda x: min(x.answer_new, x.numerator_option1), axis=1)
        agg1_df = agg1_df['actual_sku'].unstack()
        agg1_df['actual_sku1'] = agg1_df.apply(np.sum, axis=1)
        agg2_df = df[(df['denominator_option2'] != 0) & (df['mark'] != 'H')].groupby(
            by=['rid', 'denominator_option2']).agg({'answer_new': np.sum, 'numerator_option2': np.max})
        agg2_df['actual_sku'] = agg2_df.apply(lambda x: min(x.answer_new, x.numerator_option2), axis=1)
        agg2_df = agg2_df['actual_sku'].unstack()
        agg2_df['actual_sku2'] = agg2_df.apply(np.sum, axis=1)
        agg3_df = pd.concat([agg1_df['actual_sku1'], agg2_df['actual_sku2']], axis=1, sort=False)
        agg3_df['actual_sku3'] = np.fmax(agg3_df['actual_sku1'], agg3_df['actual_sku2'])
        sku_df = sku_df.join(agg3_df)
        sku_df['actual_sku'] = sku_df['actual_sku3']
        sku_df['target_sku3'] = sku_df.apply(lambda x: baby_p_target_sku(
            x.mark, x.actual_sku1, x.actual_sku2, x.target_sku), axis=1)
        sku_df['target_sku'] = sku_df['target_sku3']
        sku_df.drop(columns=['actual_sku1', 'actual_sku2', 'actual_sku3', 'target_sku3'], inplace=True)
    sku_df['fast_growing_sku_compliance'] = np.where(
        sku_df['actual_sku'] == 0, 0, round(sku_df['actual_fast_growing_sku'] / sku_df['actual_sku'], 4))
    del sku_df['mark']
    with MySQLInstance(**mysql_db_bi_task, dict_result=True) as db:
        db.executemany(sql_insert_sku, [tuple(x) for x in df[['rid', 'product_id', 'product_name',
                                                              'answer_new', 'taskid']].values])
    return sku_df
                                 how='inner', on='addressIDnum', sort=False, copy=False)

    result = pd.concat(data)
    result['shelf_url'] = 'http://pc.ppznet.com/task_pc//shenhe/aicorrect/images.jsp?responseid=' + result['rid']
    result_gb = result.groupby('addressIDnum').sum()
    result_gb.reset_index(inplace=True)
    result_gb['category'] = 'total'
    result = pd.concat([result, result_gb], sort=False)
    result['month'].fillna(method='ffill', inplace=True)
    store_info_df.drop(columns=store_info_df.columns[15:], inplace=True)
    result = pd.merge(store_info_df, result, how='inner', on='addressIDnum', sort=False, copy=False)

    engine = create_engine("mysql+pymysql://{}:{}@{}:{}/{}?charset={}".format(
        mysql_db_bi_task['username'], mysql_db_bi_task['password'], mysql_db_bi_task['host'], mysql_db_bi_task['port'],
        mysql_db_bi_task['schema'], 'utf8'))
    con = engine.connect()
    result.to_sql(name='t_pg_report_hsm_visualization', con=con, if_exists='replace', index=False)
	con.close()
    with MySQLInstance(**mysql_db_bi_task, dict_result=False) as db:
        count = int(db.query(sql_get_count)[0][0])
        date = datetime.now().strftime('%Y%m%d')
        db.execute(sql_delete_count % date)
        db.execute(sql_insert_count % (date, count))


if __name__ == '__main__':
    logger.info('Parent process %s.' % os.getpid())
    main()
    end_time = datetime.now()
    logger.info('time_consumed: %s' % (end_time-start_time))
Esempio n. 7
0
        os.path.join(os.path.dirname(os.path.realpath(__file__)),
                     ('pg_hsm_report_new_' + each_category +
                      datetime.now().strftime('%Y-%m-%d') + '.xlsx')))
    old_file.append(
        os.path.join(os.path.dirname(os.path.realpath(__file__)),
                     ('pg_hsm_report_old_' + each_category +
                      datetime.now().strftime('%Y-%m-%d') + '.xlsx')))
    checkpoint_file.append(
        os.path.join(os.path.dirname(os.path.realpath(__file__)),
                     ('pg_hsm_report_checkpoint_' + each_category +
                      datetime.now().strftime('%Y-%m-%d') + '.xlsx')))

sql_delete_report = """DELETE FROM t_pg_report_hsm WHERE taskid IN (%s)"""
sql_delete_sku = """DELETE FROM t_pg_report_hsm_sku_details WHERE taskid IN (%s)"""

with MySQLInstance(**mysql_db_bi_task, dict_result=True) as delete_db:
    for index1 in range(8):
        if index1 == 5 or index1 == 7:
            continue
        else:
            delete_db.execute(sql_delete_report % task_new[index1])
            delete_db.execute(sql_delete_sku % task_new[index1])
            delete_db.execute(sql_delete_report % task_old[index1])
            delete_db.execute(sql_delete_sku % task_old[index1])


def query_data_frame(db_dict, sql, result=True):
    with MySQLInstance(**db_dict, dict_result=result) as db:
        if db.query(sql):
            return DataFrame(db.query(sql))
        else:
Esempio n. 8
0
ss_time = conf.get('account', 'time')
order = conf.get('account', 'order').split()


def query_data_frame(db_dict, sql, result=True):
    with MySQLInstance(**db_dict, dict_result=result) as db:
        if db.query(sql):
            return DataFrame(db.query(sql))
        else:
            logger.info('No result.')
            sys.exit()


sql_1 = """SELECT Id from t_enterpriseuser WHERE email = '%s'"""

with MySQLInstance(**mysql_db_ppzck_task, dict_result=False) as db1:
    a = db1.query(sql_1 % company_id)

sql_2 = """SELECT id from t_task WHERE owner_id = '%s' AND create_time > '%s'"""

with MySQLInstance(**mysql_db_ppzck_task, dict_result=False) as db1:
    b = db1.query(sql_2 % (a[0][0], ss_time))
c = r"','".join([i[0] for i in b])

sql_3 = """SELECT tui.*, tu.phone, tu.pid_qq, tu.email, tu.realname, tu.zfbname, tu.nickname, tu.address 
from t_userinfo tui LEFT JOIN t_user tu ON tui.Id = tu.Id 
WHERE tu.id 
in (SELECT uid FROM t_response tr WHERE taskid_owner IN ('%s') GROUP BY uid)"""

d = query_data_frame(mysql_db_ppzck_task, sql_3 % c)
d.set_index('Id', inplace=True)
Esempio n. 9
0
def to_two_wave_result(i):
    logger.info('Run PID (%s)...' % os.getpid())
    sku_df_new = query_data_frame(
        mysql_db_ppzck_task,
        sql_get_sku % (task_new[i], time_selection_new, status_not_in_new))
    answer_df_new = query_data_frame(
        mysql_db_ppzck_task,
        sql_get_answer % (task_new[i], time_selection_new, status_not_in_new))
    sku_verification_df_new = query_data_frame(
        mysql_db_ppzck_task, sql_get_sku_verification %
        (task_new[i], time_selection_new, status_not_in_new))
    sku_df_old = query_data_frame(
        mysql_db_ppzck_task,
        sql_get_sku % (task_old[i], time_selection_old, status_not_in_old))
    answer_df_old = query_data_frame(
        mysql_db_ppzck_task,
        sql_get_answer % (task_old[i], time_selection_old, status_not_in_old))
    sku_verification_df_old = query_data_frame(
        mysql_db_ppzck_task, sql_get_sku_verification %
        (task_old[i], time_selection_old, status_not_in_old))
    sku_df_new.drop_duplicates(subset=['rid', 'product_id'], inplace=True)
    answer_df_new.drop_duplicates(subset=['rid', 'qid'], inplace=True)
    sku_df_old.drop_duplicates(subset=['rid', 'product_id'], inplace=True)
    answer_df_old.drop_duplicates(subset=['rid', 'qid'], inplace=True)
    if i == 0 or i == 1 or i == 2:
        multi_option_df_new = query_data_frame(
            mysql_db_ppzck_task, sql_get_multi_option % ('联合陈列_', task_new[i]))
        multi_option_df_new.drop_duplicates(subset=['qid', 'option_index'],
                                            inplace=True)
        multi_option_df_old = query_data_frame(
            mysql_db_ppzck_task, sql_get_multi_option % ('联合陈列_', task_old[i]))
        multi_option_df_old.drop_duplicates(subset=['qid', 'option_index'],
                                            inplace=True)
    elif i == 5:
        multi_option_df_new = query_data_frame(
            mysql_db_ppzck_task, sql_get_multi_option % ('样品展示_', task_new[i]))
        multi_option_df_new.drop_duplicates(subset=['qid', 'option_index'],
                                            inplace=True)
        multi_option_df_old = query_data_frame(
            mysql_db_ppzck_task, sql_get_multi_option % ('样品展示_', task_old[i]))
        multi_option_df_old.drop_duplicates(subset=['qid', 'option_index'],
                                            inplace=True)
    else:
        multi_option_df_new = DataFrame()
        multi_option_df_old = DataFrame()
    new_df, check_number_new_df = to_one_wave_result(rid_df_list_new[i],
                                                     sku_df_new, answer_df_new,
                                                     sku_verification_df_new,
                                                     multi_option_df_new,
                                                     year_new[i], i)
    old_df, check_number_old_df = to_one_wave_result(rid_df_list_old[i],
                                                     sku_df_old, answer_df_old,
                                                     sku_verification_df_old,
                                                     multi_option_df_old,
                                                     year_old[i], i)
    new_df = pd.merge(new_df, store_new_df, how='left', on='addressIDnum')
    old_df = pd.merge(old_df, store_old_df, how='left', on='addressIDnum')
    new_df = pd.merge(new_df,
                      old_df,
                      how='left',
                      on='addressIDnum',
                      suffixes=('', '_old'))
    new_df.drop_duplicates(subset='rid', inplace=True)
    new_df['check_total_shelf'] = new_df.apply(
        lambda x: check_vs_pp_total_shelf(x.total_shelf, x.total_shelf_old),
        axis=1)
    new_df['check_pg_shelf'] = new_df.apply(
        lambda x: check_vs_pp_pg_shelf(x.pg_shelf, x.pg_shelf_old), axis=1)
    new_df['check_pg_display'] = new_df.apply(lambda x: check_vs_pp_pg_display(
        x.total_display, x.pg_display, x.total_display_old, x.pg_display_old),
                                              axis=1)
    new_df['check_pg_sku'] = new_df.apply(
        lambda x: check_vs_pp_sku(i, x.actual_sku, x.actual_sku_old), axis=1)
    new_df['check_all'] = new_df.apply(lambda x: check_all(
        x.check_recent, x.shelf_display_verification_1, x.
        shelf_display_verification_2, x.sku_verification_count, x.
        check_total_shelf, x.check_pg_shelf, x.check_pg_display, x.check_pg_sku
    ),
                                       axis=1)
    report_new_df = new_df.reindex(columns=report_order[i])
    report_old_df = old_df.reindex(columns=report_order[i])
    checkpoint_new_df = new_df.reindex(columns=checkpoint_order[i])
    checkpoint_old_df = old_df.reindex(columns=checkpoint_order[i])
    report_new_df.to_csv(new_file[i], index=False, encoding='utf_8_sig')
    report_old_df.to_csv(old_file[i], index=False, encoding='utf_8_sig')
    checkpoint_new_df.to_csv(new_checkpoint_file[i],
                             index=False,
                             encoding='utf_8_sig')
    checkpoint_old_df.to_csv(old_checkpoint_file[i],
                             index=False,
                             encoding='utf_8_sig')
    check_number_new_df.to_csv(new_checkpoint_number_file[i],
                               index=False,
                               encoding='utf_8_sig')
    check_number_old_df.to_csv(old_checkpoint_number_file[i],
                               index=False,
                               encoding='utf_8_sig')
    subject = category[i] + datetime.now().strftime('%Y-%m-%d')
    contents = [
        '附件中为前后两月数据及需检查的数据',
    ]
    attachments = [
        new_file[i], old_file[i], new_checkpoint_file[i],
        old_checkpoint_file[i], new_checkpoint_number_file[i],
        old_checkpoint_number_file[i]
    ]
    with EmailSender(**email) as email_sender:
        email_sender.send_email(to=to,
                                subject=subject,
                                contents=contents,
                                attachments=attachments)
    os.remove(new_file[i])
    os.remove(old_file[i])
    os.remove(new_checkpoint_file[i])
    os.remove(old_checkpoint_file[i])
    os.remove(new_checkpoint_number_file[i])
    os.remove(old_checkpoint_number_file[i])
    with MySQLInstance(**mysql_db_bi_task, dict_result=True) as db:
        db.executemany(sql_insert_report_new, [
            tuple(x)
            for x in new_df.reindex(columns=insert_table_order_new).values
        ])
        db.executemany(sql_insert_report_old, [
            tuple(x)
            for x in old_df.reindex(columns=insert_table_order_old).values
        ])