def generatedata(lag_day=7): exec_start_time=time.time() today_string = dt.date.today().strftime("%Y%m%d") yesterday_string = (dt.date.today() - dt.timedelta(1)).strftime("%Y%m%d") n_date_string = (dt.date.today() - dt.timedelta(lag_day)).strftime("%Y%m%d") from jaydebeapi import _DEFAULT_CONVERTERS, _java_to_py import jaydebeapi _DEFAULT_CONVERTERS.update({'BIGINT':_java_to_py('longValue')}) conn = jaydebeapi.connect('com.ingres.jdbc.IngresDriver', ['jdbc:ingres://192.168.6.199:vw7/compass_v1', 'mintel', 'mintel'], "lib/iijdbc.jar") curs = conn.cursor() curs.execute(''' select TOP 10000 a.egoodsid, a.c34 as c34a, b.c34 as c34b, a.goodsid, a.currentprice, a.allcomments, a.c34, a.c1, a.item_rank, a.agg1, a.agg2, a.agg3, a.agg5, a.agg7, a.is_promo from mv_simtxn a, mv_simtxn b where a.etype='JD' AND a.ref_date='%s' AND b.etype='JD' AND b.ref_date='%s' AND a.egoodsid = b.egoodsid ''' % (yesterday_string,n_date_string)) rows = curs.fetchall() print "query read time: %.1f seconds" % (time.time() - exec_start_time) columnNames = [curs.description[i][0] for i in range(len(curs.description))] df_test = pd.DataFrame(data=rows, columns=columnNames) df_test.index = df_test['egoodsid'].values df_test['relative_item_rank']= divide(df_test['item_rank'],df_test['agg1']) df_test['brand_share'] = divide(df_test['agg5'],df_test['agg2']) df_test['log1p(RIR)'] = log(divide(df_test['item_rank'],df_test['agg1'])) #log(df_test['relative_item_rank']) df_test['log(c34)'] = log(df_test['c34b']) df_test['log(allcomments)'] = log(df_test['allcomments']) df_test['is_promo'] = pd.to_numeric(df_test['is_promo']) df_test['c1'] = pd.to_numeric(df_test['c1']) #df_test['AC_updated'] = df_test['agg7']>0 df_test["case_label"] = 0 df_test.loc[df_test.item_rank.notnull(),"case_label"]=1 #df_test.loc[df_test.item_rank.notnull() & df_test.c34.isnull(),"case_label"]=2 df_test.loc[df_test.item_rank.isnull() & df_test.c34.notnull(),"case_label"]=3 df_test.loc[df_test.item_rank.isnull() & df_test.c34.isnull(),"case_label"]=4 return df_test
#%pylab inline # ####Design: # # 1. Add the following features {relative_item_rank: item_rank/agg1, brandshare: agg5/agg2} # 2. Filter out all non-zero c30s, run them through: # - p(c30==0|D1):D1=allcomments, is_in_stock, item_rank, agg7, agg8, # - p(q30==0|D2):D2=p0c30, allcomments, is_in_stock, relative_item_rank, is_promo # 3. Generate a case number for each row, based on rules in documentation # # # In[3]: from jaydebeapi import _DEFAULT_CONVERTERS, _java_to_py _DEFAULT_CONVERTERS.update({'BIGINT': _java_to_py('longValue')}) conn = jaydebeapi.connect( 'com.ingres.jdbc.IngresDriver', ['jdbc:ingres://192.168.6.199:vw7/compass_v1', 'mintel', 'mintel'], "lib/iijdbc.jar") curs = conn.cursor() curs.execute(''' SELECT a.egoodsid, a.goodsid, a.currentprice, a.allcomments, a.c34, a.c1, a.item_rank,
import datetime import jaydebeapi from jaydebeapi import _DEFAULT_CONVERTERS, _java_to_py import sqlite3 from config import * _DEFAULT_CONVERTERS.update({'BIGINT': _java_to_py('longValue')}) class openfireMySQL: def __init__(self): self.mysqlOFCon = jaydebeapi.connect('com.mysql.jdbc.Driver', ['jdbc:mysql://' + destDBHost + ':' + destDBPort + '/' + destDBname, destUser, destPw], ['hsqldb.jar', 'mysql.jar'], ) self.mysqlOFCur = self.mysqlOFCon.cursor() def getMySQLTableNames(self): self.mysqlCon = jaydebeapi.connect('com.mysql.jdbc.Driver', ['jdbc:mysql://' + destDBHost + ':' + destDBPort, destUser, destPw], ['hsqldb.jar', 'mysql.jar'],) self.mysqlCur = self.mysqlCon.cursor() self.mysqlCur.execute("SELECT TABLE_NAME FROM information_schema.TABLES WHERE TABLE_SCHEMA = '%s'" % destDBname) self.tables = self.mysqlCur.fetchall() self.mysqlCon.close() self.tableList = [] for table in self.tables: self.tableList.append(str(table[0])) return self.tableList def addToMySQL(self, table, columns, data): data = str(data)
def lag(lag_day=7, until_day='yesterday'): ### you can put until_day as m days ago, where m must <=n ### one greater than 1 integer number is expected of until_day. ### example: m=3, means you generate df is data of 3 days ago, df_lag is data of "lag_day" days ago. import datetime as dt import time import pandas as pd exec_start_time = time.time() yesterday_string = (dt.date.today() - dt.timedelta(1)).strftime("%Y%m%d") today_string = dt.date.today().strftime("%Y%m%d") n_date_string = (dt.date.today() - dt.timedelta(lag_day)).strftime("%Y%m%d") if until_day == 'yesterday': until_day = 1 if until_day > lag_day: raise IOError("until_day: %d must be no greater than lag_day: %d!" % (until_day, lag_day)) if until_day < 1: raise IOError("until_day: %d and lag_day: %d must be no less than 1!" % (until_day, lag_day)) if type(until_day) != int or type(lag_day) != int: raise IOError("until_day and lag_day must be integer number!") yesterday_string = (dt.date.today() - dt.timedelta(until_day)).strftime("%Y%m%d") from jaydebeapi import _DEFAULT_CONVERTERS, _java_to_py import jaydebeapi _DEFAULT_CONVERTERS.update({'BIGINT': _java_to_py('longValue')}) conn = jaydebeapi.connect( 'com.ingres.jdbc.IngresDriver', ['jdbc:ingres://192.168.6.199:vw7/compass_v1', 'mintel', 'Passwd'], "lib/iijdbc.jar") curs = conn.cursor() curs.execute(''' SELECT a.egoodsid, a.goodsid, a.currentprice, a.allcomments, a.c30, a.c1, a.item_rank, a.agg1, a.agg2, a.agg3, a.agg5, a.agg7, a.is_promo FROM mv_simtxn a WHERE a.etype='JD' AND a.ref_date='%s' ''' % (yesterday_string)) rows = curs.fetchall() columnNames = [ curs.description[i][0] for i in range(len(curs.description)) ] df = pd.DataFrame(data=rows, columns=columnNames) curs = conn.cursor() curs.execute(''' select a.egoodsid, b.c30, a.goodsid, a.currentprice, a.allcomments, a.c1, a.item_rank, a.agg1, a.agg2, a.agg3, a.agg5, a.agg7, a.is_promo from mv_simtxn a, mv_simtxn b where a.etype='JD' AND a.ref_date='%s' AND b.etype='JD' AND b.ref_date='%s' AND a.egoodsid = b.egoodsid ''' % (yesterday_string, n_date_string)) rows = curs.fetchall() print "query read time: %.1f seconds" % (time.time() - exec_start_time) columnNames = [ curs.description[i][0] for i in range(len(curs.description)) ] df_lag = pd.DataFrame(data=rows, columns=columnNames) return df, df_lag
def dbapi(cls): import jaydebeapi from jaydebeapi import _DEFAULT_CONVERTERS, _java_to_py _DEFAULT_CONVERTERS.update({'BIGINT': _java_to_py('longValue')}) return jaydebeapi