def __init__(self, category=1, statistics_type='sum'): super(ConsumeMonthStudent, self).__init__(category, statistics_type) self.filename = 'data/consumemonth_student.pkl' self.bill_conn = get_bills_connection() self.legacy_conn = get_product_connection() self.cash_bill_conn = get_cash_billing_connection()
def init_insert(self): self.conn = get_product_connection() self.sql = "select user_id, nick_name, gender, birthday, place, vip, first_large_buy_at from users \ where user_id > 125144 and encrypt_mobile_v2 is not null and deleted_at is null \ and birthday > '1988-01-01'" self.get_cypherql = self.get_insert_cypherql
def __init__(self, category=1): super(LessonStudent, self).__init__(category, 'count') self.filename = 'data/lesson_student.pkl' self.conn = get_course_connection() self.schedule_conn = get_schedule_connection() self.product_conn = get_product_connection() self.get_course_lesson()
def get_increate_students(sid): return [] conn = get_product_connection() print('check the increase student') with conn.cursor() as cur: sql = "select user_id from users where user_id > %d and encrypt_mobile_v2 is not null and deleted_at is null" % int( sid) cur.execute(sql) return [i[0] for i in cur.fetchall()]
def __init__(self, category=3): super(BillDataFrame, self).__init__() self.bill_conn = get_bills_connection() self.cash_bill_conn = get_cash_billing_connection() self.product_conn = get_product_connection() self.category = category if category == 1: self.product_ids = self.format_product_id self.cash_product_ids = (2, 3, 4) elif category == 2: self.product_ids = self.experience_product_id self.cash_product_ids = (1, 999) else: self.product_ids = self.all_product_id self.cash_product_ids = (1, 2, 3, 4)
def get_student_series(refresh=False): filename = 'data/students.pkl' if not refresh and os.path.isfile(filename): print('read student from filename: %s' % filename) return pd.read_pickle(filename) else: print('read students from database') conn = get_product_connection() with conn.cursor() as cur: sql = "select user_id from users where encrypt_mobile_v2 is not null and deleted_at is null" cur.execute(sql) data = [i[0] for i in cur.fetchall()] s = pd.Series(data, dtype='uint32') print('save student to file: %s' % filename) pd.to_pickle(s, filename) return s
def get_vip_student_series(refresh=True): filename = 'data/vip_students.pkl' if os.path.isfile(filename): print('get vip students from file: %s' % filename) return pd.read_pickle(filename) conn = get_product_connection() with conn.cursor() as cur: sql = "select user_id from users where first_large_buy_at is \ not null and encrypt_mobile_v2 is not null and deleted_at is \ null" cur.execute(sql) data = [i[0] for i in cur.fetchall()] s = pd.Series(data, dtype='uint32') print('save vip student to file: %s' % filename) pd.to_pickle(s, filename) return s
def __init__(self): months = MonthIndexFactroy('2018-05') self.bill_conn = get_bills_connection() self.cash_bill_conn = get_cash_billing_connection() self.legacy_conn = get_product_connection() self.students = dict() self.all_students = set() self.months = months self.user_df = pd.DataFrame(0, index=months.index, columns=['increase_student'] + months.index) self.consumer_df = pd.DataFrame(0, index=months.index, columns=['increase_student'] + months.index) self.nil_students()
def get_student_list(refresh=False): filename = 'data/student_list' if os.path.isfile(filename): with open(filename, "r") as fo: data = fo.readlines() student_list = [int(i.replace("\n", "")) for i in data] inc_students = get_increate_students(student_list[-1]) if len(inc_students) > 0: print('write to file: %s' % filename) with open(filename, 'w') as fo: fo.writelines( [str(i) + '\n' for i in student_list + inc_students]) return student_list + inc_students conn = get_product_connection() with conn.cursor() as cur: sql = "select user_id from users where encrypt_mobile_v2 is not null and deleted_at is null" cur.execute(sql) data = [i[0] for i in cur.fetchall()] print('write to file: %s' % filename) with open(filename, 'w') as fo: fo.writelines([str(i) + '\n' for i in data]) return data
def mission_7(refresh): month_list = [ '2015-12', '2016-01', '2016-02', '2016-03', '2017-03', '2016-04', '2016-05', '2016-06', '2016-07', '2016-08', '2016-09', '2016-10', '2016-11', '2016-12', '2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06', '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12', '2018-01', '2018-02', '2018-03', '2018-04', '2018-05', '2018-06' ] df = pd.DataFrame(0, index=month_list, columns=month_list, dtype='uint8') student_set = set() bills = dict() conn = get_bills_connection() with conn.cursor() as cur: sql = "select id, student_id, paid_at from bills where product_id \ in (5,6,13,19) and status in (20, 70, 80) and student_id > 0 and deleted_at \ is null" cur.execute(sql) print(cur.rowcount) while cur.rownumber < cur.rowcount: r = cur.fetchone() if r[1] not in student_set: student_set.add(r[1]) month = r[2].strftime('%Y-%m') if month in bills.keys(): bills[month].append(r[0]) else: bills[month] = [ r[0], ] with conn.cursor() as cur: sql = "select bill_id, updated_at from refunds where status = 3 and type = 0 and deleted_at is null" cur.execute(sql) print(cur.rowcount) while cur.rownumber < cur.rowcount: r = cur.fetchone() for m, ml in bills.items(): if r[0] in ml: df.loc[m, r[1].strftime('%Y-%m')] += 1 conn = get_product_connection() with conn.cursor() as cur: sql = "select bill_id, updated_at from refund where deleted_at is null and status_id = 20 " cur.execute(sql) print(cur.rowcount) while cur.rownumber < cur.rowcount: r = cur.fetchone() for m, ml in bills.items(): if r[0] in ml: df.loc[m, r[1].strftime('%Y-%m')] += 1 cash_bills = dict() conn = get_cash_billing_connection() with conn.cursor() as cur: sql = "select id, student_id, paid_at from bills where product_id in (2,3,4) and \ status in (20, 70, 80) and deleted_at is null" cur.execute(sql) print(cur.rowcount) while cur.rownumber < cur.rowcount: r = cur.fetchone() if r[1] not in student_set: student_set.add(r[1]) month = r[2].strftime('%Y-%m') if month in cash_bills.keys(): cash_bills[month].append(r[0]) else: cash_bills[month] = [ r[0], ] with conn.cursor() as cur: sql = "select bill_id, updated_at from refunds where status = 3 and type = 0 and deleted_at is null" cur.execute(sql) print(cur.rowcount) while cur.rownumber < cur.rowcount: r = cur.fetchone() for m, ml in cash_bills.items(): if r[0] in ml: df.loc[m, r[1].strftime('%Y-%m')] += 1 print(df) df.insert(0, 'count', 0) for m in month_list: lb = len(bills[m]) if m in bills.keys() else 0 lcb = len(cash_bills[m]) if m in cash_bills.keys() else 0 df.loc[m, 'count'] = lb + lcb df.to_csv('data/first_format_bills_refund.csv')
def get_users(self): conn = get_product_connection() sql = "select user_id, first_large_buy_at from users where vip = 1 and first_large_buy_at > '2016-01-01' and deleted_at is null" df = pd.read_sql(sql, conn, index_col='user_id') df['count'] = 0 self._df = df
def __init__(self): super(ScheduleDataFrame, self).__init__() self.conn = get_schedule_connection() self.product_conn = get_product_connection()