def query_data_frame(db_dict, sql, result=True): with MySQLInstance(**db_dict, dict_result=result) as db: if db.query(sql): return DataFrame(db.query(sql)) else: logger.info('No result.') sys.exit()
def main(): store_info_df = query_data_frame(mysql_db_ppzck_task, sql_get_store_info) data = [] for index1 in range(8): logger.info(sql_data % (category[index1], task_new[index1], category[index1])) data.append( query_data_frame( mysql_db_bi_task, sql_data % (category[index1], task_new[index1], category[index1]))) data[index1].drop_duplicates(subset='addressIDnum', inplace=True) store_info_df = pd.merge( store_info_df, data[index1].reindex(columns=['addressIDnum', 'rid']), how='inner', on='addressIDnum', sort=False, copy=False) result = pd.concat(data) result[ 'shelf_url'] = 'http://pc.ppznet.com/task_pc//shenhe/aicorrect/images.jsp?responseid=' + result[ 'rid'] result_gb = result.groupby('addressIDnum').sum() result_gb.reset_index(inplace=True) result_gb['category'] = 'total' result = pd.concat([result, result_gb], sort=False) result['month'].fillna(method='ffill', inplace=True) store_info_df.drop(columns=store_info_df.columns[15:], inplace=True) result = pd.merge(store_info_df, result, how='inner', on='addressIDnum', sort=False, copy=False) engine = create_engine("mysql+pymysql://{}:{}@{}:{}/{}?charset={}".format( mysql_db_bi_task['username'], mysql_db_bi_task['password'], mysql_db_bi_task['host'], mysql_db_bi_task['port'], mysql_db_bi_task['schema'], 'utf8')) con = engine.connect() result.to_sql(name='t_pg_report_hsm_visualization', con=con, if_exists='replace', index=False) con.close() with MySQLInstance(**mysql_db_bi_task, dict_result=False) as db: count = int(db.query(sql_get_count)[0][0]) date = datetime.now().strftime('%Y%m%d') db.execute(sql_delete_count % date) db.execute(sql_insert_count % (date, count))
def to_two_wave_result(i): logger.info('Run PID (%s)...' % os.getpid()) new_df, check_number_new_df = to_one_wave_result( rid_df_list_new[i], sku_df_list_new[i], answer_df_list_new[i], sku_verification_df_list_new[i], multi_option_df_list_new[i], year_new[i], i) old_df, check_number_old_df = to_one_wave_result( rid_df_list_old[i], sku_df_list_old[i], answer_df_list_old[i], sku_verification_df_list_old[i], multi_option_df_list_old[i], year_old[i], i) new_df = pd.merge(new_df, store_new_df, how='left', on='addressIDnum') old_df = pd.merge(old_df, store_old_df, how='left', on='addressIDnum') new_df = pd.merge(new_df, old_df, how='left', on='addressIDnum', suffixes=('', '_old')) new_df.drop_duplicates(subset='rid', inplace=True) new_df['check_total_shelf'] = new_df.apply( lambda x: check_vs_pp_total_shelf(x.total_shelf, x.total_shelf_old), axis=1) new_df['check_pg_shelf'] = new_df.apply( lambda x: check_vs_pp_pg_shelf(x.pg_shelf, x.pg_shelf_old), axis=1) new_df['check_pg_display'] = new_df.apply( lambda x: check_vs_pp_pg_display(x.total_display, x.pg_display, x.total_display_old, x.pg_display_old), axis=1) new_df['check_pg_sku'] = new_df.apply( lambda x: check_vs_pp_sku(i, x.actual_sku, x.actual_sku_old), axis=1) new_df['check_all'] = new_df.apply( lambda x: check_all(x.check_recent, x.shelf_display_verification_1, x.shelf_display_verification_2, x.sku_verification_count, x.check_total_shelf, x.check_pg_shelf, x.check_pg_display, x.check_pg_sku), axis=1) report_new_df = new_df.reindex(columns=report_order[i]) report_old_df = old_df.reindex(columns=report_order[i]) checkpoint_new_df = new_df.reindex(columns=checkpoint_order[i]) checkpoint_old_df = old_df.reindex(columns=checkpoint_order[i]) report_new_df.to_excel(new_file[i], category[i], columns=report_order[i], index=False) report_old_df.to_excel(old_file[i], category[i], columns=report_order[i], index=False) writer_checkpoint = pd.ExcelWriter(checkpoint_file[i]) checkpoint_new_df.to_excel(writer_checkpoint, category[i] + '_new', index=False) checkpoint_old_df.to_excel(writer_checkpoint, category[i] + '_old', index=False) check_number_new_df.to_excel(writer_checkpoint, category[i] + '_number_new', index=False) check_number_old_df.to_excel(writer_checkpoint, category[i] + '_number_old', index=False) writer_checkpoint.close() subject = category[i] + datetime.now().strftime('%Y-%m-%d') contents = ['附件中为前后两月数据及需检查的数据', ] attachments = [new_file[i], old_file[i], checkpoint_file[i]] with EmailSender(**email) as email_sender: email_sender.send_email(to=to, subject=subject, contents=contents, attachments=attachments) os.remove(new_file[i]) os.remove(old_file[i]) os.remove(checkpoint_file[i]) with MySQLInstance(**mysql_db_bi_task, dict_result=True) as db: db.executemany(sql_insert_report_new, [tuple(x) for x in new_df.reindex(columns=insert_table_order_new).values]) db.executemany(sql_insert_report_old, [tuple(x) for x in old_df.reindex(columns=insert_table_order_old).values])
def main(index): logger.info('Run PID %s (%s)...' % (index, os.getpid())) with MySQLInstance(**mysql_db_ppzck_task, dict_result=False) as db: tuple_url_data = db.query(sql_get_url % (time_dict[index] * 6)) for tup in tuple_url_data: for md in tup[5].split(';'): file_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'media_download', string_normalize(tup[0]), string_normalize(tup[1]), string_normalize(tup[2]), string_normalize(tup[3])) if not os.path.exists(file_path): os.makedirs(file_path) url = tup[4] + md response = requests.get(url) med = response.content with open(os.path.join(file_path, md.replace(':', '_')), 'wb') as f: f.write(med) time.sleep(1)
def generate_sku_df(df1, df2, i): df = pd.merge(df1, store_info_df, how='left', on='addressIDnum', sort=False, copy=False) df = pd.merge(df, sku_info_df[sku_info_df['category'] == category[i]], how='left', on='mark', sort=False, copy=False) df = pd.merge(df, df2, how='left', on=['rid', 'product_id'], sort=False, copy=False) df['answer_new'] = df.apply(lambda x: sku_normalization_answer(x.RD, x.status, x.hnhb), axis=1) df = df[df['answer_new'] != -1] df['fast_growing_sku_exist'] = df.apply(lambda x: fast_growing_sku_answer(x.answer_new, x.fast_growing), axis=1) sku_df = df.reindex(columns=['rid', 'mark', 'target_sku', 'product_name', 'answer_new']) sku_df = sku_df.set_index(['rid', 'mark', 'target_sku', 'product_name']).unstack() sku_df.columns = sku_df.columns.droplevel(0) sku_df.reset_index(level=[1, 2], inplace=True) agg_df = df.groupby('rid')['answer_new', 'fast_growing_sku_exist'].sum() agg_df.rename(columns={'answer_new': 'actual_sku', 'fast_growing_sku_exist': 'actual_fast_growing_sku'}, inplace=True) agg_df['base_sku'] = agg_df['actual_sku'] - agg_df['actual_fast_growing_sku'] sku_df = sku_df.join(agg_df) if i == 5: agg1_df = df[df['denominator_option1'] != 0].groupby( by=['rid', 'denominator_option1']).agg({'answer_new': np.sum, 'numerator_option1': np.max}) agg1_df['actual_sku'] = agg1_df.apply(lambda x: min(x.answer_new, x.numerator_option1), axis=1) agg1_df = agg1_df['actual_sku'].unstack() agg1_df['actual_sku1'] = agg1_df.apply(np.sum, axis=1) agg2_df = df[(df['denominator_option2'] != 0) & (df['mark'] != 'H')].groupby( by=['rid', 'denominator_option2']).agg({'answer_new': np.sum, 'numerator_option2': np.max}) agg2_df['actual_sku'] = agg2_df.apply(lambda x: min(x.answer_new, x.numerator_option2), axis=1) agg2_df = agg2_df['actual_sku'].unstack() agg2_df['actual_sku2'] = agg2_df.apply(np.sum, axis=1) agg3_df = pd.concat([agg1_df['actual_sku1'], agg2_df['actual_sku2']], axis=1, sort=False) agg3_df['actual_sku3'] = np.fmax(agg3_df['actual_sku1'], agg3_df['actual_sku2']) sku_df = sku_df.join(agg3_df) sku_df['actual_sku'] = sku_df['actual_sku3'] sku_df['target_sku3'] = sku_df.apply(lambda x: baby_p_target_sku( x.mark, x.actual_sku1, x.actual_sku2, x.target_sku), axis=1) sku_df['target_sku'] = sku_df['target_sku3'] sku_df.drop(columns=['actual_sku1', 'actual_sku2', 'actual_sku3', 'target_sku3'], inplace=True) sku_df['fast_growing_sku_compliance'] = np.where( sku_df['actual_sku'] == 0, 0, round(sku_df['actual_fast_growing_sku'] / sku_df['actual_sku'], 4)) del sku_df['mark'] with MySQLInstance(**mysql_db_bi_task, dict_result=True) as db: db.executemany(sql_insert_sku, [tuple(x) for x in df[['rid', 'product_id', 'product_name', 'answer_new', 'taskid']].values]) return sku_df
how='inner', on='addressIDnum', sort=False, copy=False) result = pd.concat(data) result['shelf_url'] = 'http://pc.ppznet.com/task_pc//shenhe/aicorrect/images.jsp?responseid=' + result['rid'] result_gb = result.groupby('addressIDnum').sum() result_gb.reset_index(inplace=True) result_gb['category'] = 'total' result = pd.concat([result, result_gb], sort=False) result['month'].fillna(method='ffill', inplace=True) store_info_df.drop(columns=store_info_df.columns[15:], inplace=True) result = pd.merge(store_info_df, result, how='inner', on='addressIDnum', sort=False, copy=False) engine = create_engine("mysql+pymysql://{}:{}@{}:{}/{}?charset={}".format( mysql_db_bi_task['username'], mysql_db_bi_task['password'], mysql_db_bi_task['host'], mysql_db_bi_task['port'], mysql_db_bi_task['schema'], 'utf8')) con = engine.connect() result.to_sql(name='t_pg_report_hsm_visualization', con=con, if_exists='replace', index=False) con.close() with MySQLInstance(**mysql_db_bi_task, dict_result=False) as db: count = int(db.query(sql_get_count)[0][0]) date = datetime.now().strftime('%Y%m%d') db.execute(sql_delete_count % date) db.execute(sql_insert_count % (date, count)) if __name__ == '__main__': logger.info('Parent process %s.' % os.getpid()) main() end_time = datetime.now() logger.info('time_consumed: %s' % (end_time-start_time))
os.path.join(os.path.dirname(os.path.realpath(__file__)), ('pg_hsm_report_new_' + each_category + datetime.now().strftime('%Y-%m-%d') + '.xlsx'))) old_file.append( os.path.join(os.path.dirname(os.path.realpath(__file__)), ('pg_hsm_report_old_' + each_category + datetime.now().strftime('%Y-%m-%d') + '.xlsx'))) checkpoint_file.append( os.path.join(os.path.dirname(os.path.realpath(__file__)), ('pg_hsm_report_checkpoint_' + each_category + datetime.now().strftime('%Y-%m-%d') + '.xlsx'))) sql_delete_report = """DELETE FROM t_pg_report_hsm WHERE taskid IN (%s)""" sql_delete_sku = """DELETE FROM t_pg_report_hsm_sku_details WHERE taskid IN (%s)""" with MySQLInstance(**mysql_db_bi_task, dict_result=True) as delete_db: for index1 in range(8): if index1 == 5 or index1 == 7: continue else: delete_db.execute(sql_delete_report % task_new[index1]) delete_db.execute(sql_delete_sku % task_new[index1]) delete_db.execute(sql_delete_report % task_old[index1]) delete_db.execute(sql_delete_sku % task_old[index1]) def query_data_frame(db_dict, sql, result=True): with MySQLInstance(**db_dict, dict_result=result) as db: if db.query(sql): return DataFrame(db.query(sql)) else:
ss_time = conf.get('account', 'time') order = conf.get('account', 'order').split() def query_data_frame(db_dict, sql, result=True): with MySQLInstance(**db_dict, dict_result=result) as db: if db.query(sql): return DataFrame(db.query(sql)) else: logger.info('No result.') sys.exit() sql_1 = """SELECT Id from t_enterpriseuser WHERE email = '%s'""" with MySQLInstance(**mysql_db_ppzck_task, dict_result=False) as db1: a = db1.query(sql_1 % company_id) sql_2 = """SELECT id from t_task WHERE owner_id = '%s' AND create_time > '%s'""" with MySQLInstance(**mysql_db_ppzck_task, dict_result=False) as db1: b = db1.query(sql_2 % (a[0][0], ss_time)) c = r"','".join([i[0] for i in b]) sql_3 = """SELECT tui.*, tu.phone, tu.pid_qq, tu.email, tu.realname, tu.zfbname, tu.nickname, tu.address from t_userinfo tui LEFT JOIN t_user tu ON tui.Id = tu.Id WHERE tu.id in (SELECT uid FROM t_response tr WHERE taskid_owner IN ('%s') GROUP BY uid)""" d = query_data_frame(mysql_db_ppzck_task, sql_3 % c) d.set_index('Id', inplace=True)
def to_two_wave_result(i): logger.info('Run PID (%s)...' % os.getpid()) sku_df_new = query_data_frame( mysql_db_ppzck_task, sql_get_sku % (task_new[i], time_selection_new, status_not_in_new)) answer_df_new = query_data_frame( mysql_db_ppzck_task, sql_get_answer % (task_new[i], time_selection_new, status_not_in_new)) sku_verification_df_new = query_data_frame( mysql_db_ppzck_task, sql_get_sku_verification % (task_new[i], time_selection_new, status_not_in_new)) sku_df_old = query_data_frame( mysql_db_ppzck_task, sql_get_sku % (task_old[i], time_selection_old, status_not_in_old)) answer_df_old = query_data_frame( mysql_db_ppzck_task, sql_get_answer % (task_old[i], time_selection_old, status_not_in_old)) sku_verification_df_old = query_data_frame( mysql_db_ppzck_task, sql_get_sku_verification % (task_old[i], time_selection_old, status_not_in_old)) sku_df_new.drop_duplicates(subset=['rid', 'product_id'], inplace=True) answer_df_new.drop_duplicates(subset=['rid', 'qid'], inplace=True) sku_df_old.drop_duplicates(subset=['rid', 'product_id'], inplace=True) answer_df_old.drop_duplicates(subset=['rid', 'qid'], inplace=True) if i == 0 or i == 1 or i == 2: multi_option_df_new = query_data_frame( mysql_db_ppzck_task, sql_get_multi_option % ('联合陈列_', task_new[i])) multi_option_df_new.drop_duplicates(subset=['qid', 'option_index'], inplace=True) multi_option_df_old = query_data_frame( mysql_db_ppzck_task, sql_get_multi_option % ('联合陈列_', task_old[i])) multi_option_df_old.drop_duplicates(subset=['qid', 'option_index'], inplace=True) elif i == 5: multi_option_df_new = query_data_frame( mysql_db_ppzck_task, sql_get_multi_option % ('样品展示_', task_new[i])) multi_option_df_new.drop_duplicates(subset=['qid', 'option_index'], inplace=True) multi_option_df_old = query_data_frame( mysql_db_ppzck_task, sql_get_multi_option % ('样品展示_', task_old[i])) multi_option_df_old.drop_duplicates(subset=['qid', 'option_index'], inplace=True) else: multi_option_df_new = DataFrame() multi_option_df_old = DataFrame() new_df, check_number_new_df = to_one_wave_result(rid_df_list_new[i], sku_df_new, answer_df_new, sku_verification_df_new, multi_option_df_new, year_new[i], i) old_df, check_number_old_df = to_one_wave_result(rid_df_list_old[i], sku_df_old, answer_df_old, sku_verification_df_old, multi_option_df_old, year_old[i], i) new_df = pd.merge(new_df, store_new_df, how='left', on='addressIDnum') old_df = pd.merge(old_df, store_old_df, how='left', on='addressIDnum') new_df = pd.merge(new_df, old_df, how='left', on='addressIDnum', suffixes=('', '_old')) new_df.drop_duplicates(subset='rid', inplace=True) new_df['check_total_shelf'] = new_df.apply( lambda x: check_vs_pp_total_shelf(x.total_shelf, x.total_shelf_old), axis=1) new_df['check_pg_shelf'] = new_df.apply( lambda x: check_vs_pp_pg_shelf(x.pg_shelf, x.pg_shelf_old), axis=1) new_df['check_pg_display'] = new_df.apply(lambda x: check_vs_pp_pg_display( x.total_display, x.pg_display, x.total_display_old, x.pg_display_old), axis=1) new_df['check_pg_sku'] = new_df.apply( lambda x: check_vs_pp_sku(i, x.actual_sku, x.actual_sku_old), axis=1) new_df['check_all'] = new_df.apply(lambda x: check_all( x.check_recent, x.shelf_display_verification_1, x. shelf_display_verification_2, x.sku_verification_count, x. check_total_shelf, x.check_pg_shelf, x.check_pg_display, x.check_pg_sku ), axis=1) report_new_df = new_df.reindex(columns=report_order[i]) report_old_df = old_df.reindex(columns=report_order[i]) checkpoint_new_df = new_df.reindex(columns=checkpoint_order[i]) checkpoint_old_df = old_df.reindex(columns=checkpoint_order[i]) report_new_df.to_csv(new_file[i], index=False, encoding='utf_8_sig') report_old_df.to_csv(old_file[i], index=False, encoding='utf_8_sig') checkpoint_new_df.to_csv(new_checkpoint_file[i], index=False, encoding='utf_8_sig') checkpoint_old_df.to_csv(old_checkpoint_file[i], index=False, encoding='utf_8_sig') check_number_new_df.to_csv(new_checkpoint_number_file[i], index=False, encoding='utf_8_sig') check_number_old_df.to_csv(old_checkpoint_number_file[i], index=False, encoding='utf_8_sig') subject = category[i] + datetime.now().strftime('%Y-%m-%d') contents = [ '附件中为前后两月数据及需检查的数据', ] attachments = [ new_file[i], old_file[i], new_checkpoint_file[i], old_checkpoint_file[i], new_checkpoint_number_file[i], old_checkpoint_number_file[i] ] with EmailSender(**email) as email_sender: email_sender.send_email(to=to, subject=subject, contents=contents, attachments=attachments) os.remove(new_file[i]) os.remove(old_file[i]) os.remove(new_checkpoint_file[i]) os.remove(old_checkpoint_file[i]) os.remove(new_checkpoint_number_file[i]) os.remove(old_checkpoint_number_file[i]) with MySQLInstance(**mysql_db_bi_task, dict_result=True) as db: db.executemany(sql_insert_report_new, [ tuple(x) for x in new_df.reindex(columns=insert_table_order_new).values ]) db.executemany(sql_insert_report_old, [ tuple(x) for x in old_df.reindex(columns=insert_table_order_old).values ])