def saveNewestMonthData(year=2017,month=1,maxPage=5,pageSize=60): bidSummaryData = scrapySummaryDataOfMonth("http://bidwiz.duapp.com/bwCheckController.do?getBidSummaryData",year,month,maxPage,pageSize) bidData = scrapyDataOfMonth("http://bidwiz.duapp.com/bwCheckController.do?getBidHistoricalData",year,month,maxPage,pageSize) secrets=confidentials.getMySqlAuth() conn=pymysql.connect(host=secrets[0],user=secrets[1],passwd=secrets[2],db=secrets[3]) conn.set_charset("utf8") cur=conn.cursor() cur.execute("set names utf8;") cur.execute("set character set utf8;") cur.execute("set character_set_connection=utf8;") print(bidSummaryData) sql="insert into t_bid_summary (bid_month,bid_date,alert_price,avg_price,bid_people_num,bid_percent,license_num,lowest_price,lowest_price_time) values ('{0}','{1}','{2}','{3}','{4}','{5}','{6}','{7}','{8}')".format( bidSummaryData["bid_month"],bidSummaryData["bid_date"],bidSummaryData["alert_price"],bidSummaryData["avg_price"],bidSummaryData["bid_people_num"],bidSummaryData["bid_percent"],bidSummaryData["license_num"],bidSummaryData["lowest_price"],bidSummaryData["lowest_price_time"] ) print(sql) cur.execute(sql) if bidData!=None: for d in bidData: sql="insert into t_bid_data (system_time,lowest_price_time,bid_month,lowest_price_to,lowest_price,lowest_price_from) values ('{0}','{1}','{2}','{3}','{4}','{5}')".format( d["system_time"],d["lowest_price_time"],d["bid_month"],d["lowest_price_to"],d["lowest_price"],d["lowest_price_from"] ) print(sql) cur.execute(sql) cur.close() conn.close()
def __init__(self): secrets = confidentials.getMySqlAuth() conn = pymysql.connect(host=secrets[0], user=secrets[1], passwd=secrets[2], db=secrets[3]) cur = conn.cursor()
def getJsonFromSummaryData(): secrets=confidentials.getMySqlAuth() conn=pymysql.connect(host=secrets[0],user=secrets[1],passwd=secrets[2],db=secrets[3]) cur=conn.cursor() cur.execute("use "+secrets[3]) result=[] sql="select * from t_bid_summary order by bid_month asc" cur.execute(sql) rows=cur.fetchall() for r in rows: print(r) res={} res["bid_month"]=r[0] res["alert_price"]=r[2] res["lowest_price"]=r[7] res["avg_price"]=r[3] res["bid_people_num"]=r[4] res["license_num"]=r[6] print(res) result.append(res) cur.close() conn.close() file_object=open("../data/summary.json","w") file_object.write(json.dumps(result)) file_object.close()
def calculateFinalMarginPrice(year=2014): secrets=confidentials.getMySqlAuth() conn=pymysql.connect(host=secrets[0],user=secrets[1],passwd=secrets[2],db=secrets[3]) cur=conn.cursor() for month in range(1,13): if month<10: month="0"+str(month) bid_month=str(year)+str(month) sql='select lowest_price from t_bid_data where bid_month=%s and system_time="11:29:59"' % (bid_month) cur.execute(sql) if cur.rowcount>0: final_lowest_price=float(cur.fetchone()[0]) for t in range(30,59): system_time='11:29:'+str(t) sql='select lowest_price from t_bid_data where bid_month="%s" and system_time="%s"' % (bid_month,system_time) print(sql) cur.execute(sql) if cur.rowcount>0: t_lowest_price=float(cur.fetchone()[0]) margin_price=final_lowest_price - t_lowest_price sql='update t_bid_data set final_margin_price=%f where bid_month="%s" and system_time="%s"' % (margin_price,bid_month,system_time) print(sql) cur.execute(sql) cur.close() conn.close()
def getBidDataFromDB(yearStart=2016,yearEnd=2016): secrets=confidentials.getMySqlAuth() conn=pymysql.connect(host=secrets[0],user=secrets[1],passwd=secrets[2],db=secrets[3]) cur=conn.cursor() cur.execute('use ' + secrets[3]) for year in range(yearStart,yearEnd+1): data='' for month in range(1,13): if month<10: month="0"+str(month) bid_month=str(year)+str(month) sql='select count(0) from t_bid_data where bid_month="%s"' % (bid_month) cur.execute(sql) r=cur.fetchone() if r[0]==0: print('no data from ' +bid_month) continue for second in range(30,60): system_time='11:29:'+str(second) sql='select lowest_price from t_bid_data where bid_month="%s" and system_time="%s"' % (bid_month,system_time) cur.execute(sql) r=cur.fetchone() lowest_price=0 if cur.rowcount>0: lowest_price=r[0] else: print(bid_month+' '+system_time+' no data ') print(bid_month+' '+system_time+' '+str(lowest_price)) data+=bid_month+'\t'+system_time+'\t'+str(lowest_price)+'\n' file_bid_data=open('../data/bid_'+str(year)+'.txt','w') file_bid_data.write(data) file_bid_data.close()
def saveAllBidDataToCSV(fileName,startTime=None,endTime=None): secrets=confidentials.getMySqlAuth() conn=pymysql.connect(host=secrets[0],user=secrets[1],passwd=secrets[2],db=secrets[3]) conn.set_charset("utf8") cur=conn.cursor() cur.execute("set names utf8;") cur.execute("set character set utf8;") cur.execute("set character_set_connection=utf8;") sql='select b.id,b.bid_month,b.system_time,b.lowest_price_time as real_lowest_price_time,b.lowest_price as real_lowest_price,b.lowest_price_from as real_lowest_price_from,b.lowest_price_to as real_lowest_price_to,a.bid_date,a.alert_price,a.avg_price,a.bid_people_num,a.bid_percent,a.license_num,a.lowest_price,a.lowest_price_time,a.lowest_price_time as tmp from t_bid_summary as a left join t_bid_data as b on a.bid_month=b.bid_month where 1=1 ' if startTime!=None: sql+=' and system_time>="'+startTime+'"' if endTime!=None: sql+=' and system_time<="'+endTime+'"' sql+=' ORDER BY a.bid_month desc,b.system_time ASC' cur.execute(sql) csvFile = open("../data/"+fileName,"w+",newline='') writer=csv.writer(csvFile) writer.writerow(("id","bid_month","system_time","real_lowest_price_time","real_lowest_price","real_lowest_price_from","real_lowest_price_to","bid_date","alert_price","avg_price","bid_people_num","bid_percent","license_num","lowest_price","lowest_price_time","lowest_price_time_order")) rows=cur.fetchall() for row in rows: print(row) if row[0]!=None: row=list(row) lowest_price_time=str(row[len(row)-1]) lowest_price_time_1=lowest_price_time[:8] lowest_price_time_2=lowest_price_time[10:-1] #print(lowest_price_time_1+":::::"+lowest_price_time_2) row[len(row)-2]=lowest_price_time_1 row[len(row)-1]=lowest_price_time_2 writer.writerow(row) csvFile.close() cur.close() conn.close()
def saveBidDataToDB(bidData): secrets=confidentials.getMySqlAuth() conn=pymysql.connect(host=secrets[0],user=secrets[1],passwd=secrets[2],db=secrets[3]) cur=conn.cursor() cur.execute("use "+secrets[3]) for d in bidData: sql="insert into t_bid_data (bid_people_num,system_time,lowest_price_time,bid_month,lowest_price_to,lowest_price,lowest_price_from) values (%d,'%s','%s',%s,%d,%d,%d)" % (d["bid_people_num"],d["system_time"],d["lowest_price_time"],d["bid_month"],d["lowest_price_to"],d["lowest_price"],d["lowest_price_from"]) print(sql) cur.execute(sql) #print("save the following dara to database:") #print(d) cur.close() conn.close()
def saveStandRegressValue(values): secrets = confidentials.getMySqlAuth() conn = pymysql.connect(host=secrets[0], user=secrets[1], passwd=secrets[2], db=secrets[3]) cur = conn.cursor() for value in values: sql = 'update t_bid_data set stand_regress_value=%s where id=%s' % ( value[2], int(value[0])) print(sql) cur.execute(sql) cur.close() conn.close()
def save30260UsefulBidDataToCSV(fileName="30_60_useful_bid_data.csv",startTime="11:29:30",endTime="11:30:00"): secrets=confidentials.getMySqlAuth() conn=pymysql.connect(host=secrets[0],user=secrets[1],passwd=secrets[2],db=secrets[3]) conn.set_charset("utf8") cur=conn.cursor() cur.execute("set names utf8;") cur.execute("set character set utf8;") cur.execute("set character_set_connection=utf8;") sql='select b.id,b.bid_month,b.system_time,b.lowest_price_time as real_lowest_price_time,b.lowest_price as real_lowest_price,a.alert_price,a.avg_price,a.bid_people_num,a.license_num,a.lowest_price,a.lowest_price_time,a.lowest_price_time as tmp from t_bid_summary as a left join t_bid_data as b on a.bid_month=b.bid_month where 1=1 ' if startTime!=None: sql+=' and system_time>="'+startTime+'"' if endTime!=None: sql+=' and system_time<="'+endTime+'"' sql+=' ORDER BY a.bid_month desc,b.system_time ASC' cur.execute(sql) csvFile = open("../data/"+fileName,"w+",newline='') writer=csv.writer(csvFile) writer.writerow(("id","bid_month","system_time","real_lowest_price_time","real_lowest_price","alert_price","avg_price","bid_people_num","license_num","lowest_price","lowest_price_time","lowest_price_time_order","result")) rows=cur.fetchall() for row in rows: print(row) if row[0]!=None: row=list(row) #bid_month 201612 is converted to 16.12 row[1]=str(row[1]) row[1]=row[1][2:4]+"."+row[1][-2:] #system_time 11:29:30 is converted to 9.30, if it is 11:30:00, then to 9.60 row[2]=str(row[2]) if row[2][-2:]=="00": row[2]="9.60" else: row[2]=row[2][-4:].replace(":",".") row[3]=str(row[3])[-4:].replace(":",".") lowest_price_time=str(row[len(row)-1]) lowest_price_time_1=lowest_price_time[:8][-4:].replace(":",".") lowest_price_time_2=lowest_price_time[10:-1] #print(lowest_price_time_1+":::::"+lowest_price_time_2) row[len(row)-2]=lowest_price_time_1 row[len(row)-1]=lowest_price_time_2 if row[4]==row[9]: row.append(1) else: row.append(0) writer.writerow(row) csvFile.close() cur.close() conn.close()
def getAllBidData(): secrets=confidentials.getMySqlAuth() conn=pymysql.connect(host=secrets[0],user=secrets[1],passwd=secrets[2],db=secrets[3]) cur=conn.cursor() cur.execute('use ' +secrets[3]) sql='select a.bid_month,a.system_time,a.lowest_price,b.alert_price,b.bid_people_num,b.license_num,b.lowest_price as final_lowest_price,a.id from t_bid_data as a left join t_bid_summary as b on a.bid_month=b.bid_month where b.alert_price>0 and a.system_time>="11:29:30" and a.system_time<="11:29:59" order by a.bid_month desc,a.system_time asc' cur.execute(sql) data='' rows=cur.fetchall() for r in rows: print(r) t=r[1].split(':')[2] data+=str(r[7])+'\t'+r[0]+'\t'+t+'\t'+str(r[2])+'\t'+str(r[3])+'\t'+str(r[4])+'\t'+str(r[5])+'\t'+str(r[6])+'\n' data_file=open('../data/bid_data_all.txt','w') data_file.write(data) data_file.close() cur.close() conn.close()
def saveSummaryToCSV(): secrets=confidentials.getMySqlAuth() conn=pymysql.connect(host=secrets[0],user=secrets[1],passwd=secrets[2],db=secrets[3]) conn.set_charset("utf8") cur=conn.cursor() cur.execute("set names utf8;") cur.execute("set character set utf8;") cur.execute("set character_set_connection=utf8;") sql='select bid_month, alert_price,bid_people_num,license_num,lowest_price from t_bid_summary order by bid_month desc' cur.execute(sql) csv_file=open("../data/summary.csv",mode='w',newline="") writer = csv.writer(csv_file) writer.writerow(("bid_month","alert_price","bid_people_num","license_num","lowest_price")) rows=cur.fetchall() for row in rows: if row[1]!=0: writer.writerow(row) csv_file.close() cur.close() conn.close()
def saveSummaryToDB(summaryData): secrets=confidentials.getMySqlAuth() conn=pymysql.connect(host=secrets[0],user=secrets[1],passwd=secrets[2],db=secrets[3]) conn.set_charset("utf8") cur=conn.cursor() cur.execute("use "+secrets[3]) cur.execute("set names utf8;") cur.execute("set character set utf8;") cur.execute("set character_set_connection=utf8;") for d in summaryData["rows"]: if d["alert_price"]==None: d["alert_price"]=0 sql="insert into t_bid_summary (bid_month,bid_date,alert_price,avg_price,bid_people_num,bid_percent,license_num,lowest_price,lowest_price_time) values ('%s','%s',%d,%d,%d,'%s',%d,%d,'%s')" % (d["bid_month"],d["bid_date"],d["alert_price"],d["avg_price"],d["bid_people_num"],d["bid_percent"],d["license_num"],d["lowest_price"],d["lowest_price_time"]) #sql=sql.encode("utf-8") print(sql) cur.execute(sql) #print("insert the following data to database") #print(d) cur.close() conn.close()
def getJsonOfPriceTimeOfYear(year=2016): secrets=confidentials.getMySqlAuth() conn=pymysql.connect(host=secrets[0],user=secrets[1],passwd=secrets[2],db=secrets[3]) cur=conn.cursor() data=[] for i in range(1,13): if i<10: i='0'+str(i) bid_month=str(year)+str(i) sql='select system_time,lowest_price,stand_regress_value from t_bid_data where system_time>="11:29:30" and system_time<="11:29:59" and bid_month="'+bid_month+'" order by system_time asc' cur.execute(sql) if cur.rowcount>0: d={} d["bid_month"]=bid_month d["rows"]=[] rows=cur.fetchall() for row in rows: d["rows"].append({"system_time":row[0],"lowest_price":row[1],"stand_regress_value":int(row[2])}) data.append(d) data_file=open('../data/bid_'+str(year)+'_time_price.json','w') data_file.write(json.dumps(data)) cur.close() conn.close()
def getBidDataJsonFromDB(yearStart=2014,yearEnd=2016): secrets=confidentials.getMySqlAuth() conn=pymysql.connect(host=secrets[0],user=secrets[1],passwd=secrets[2],db=secrets[3]) cur=conn.cursor() cur.execute('use ' + secrets[3]) for year in range(yearStart,yearEnd+1): data={} data["months"]=[] data["rows"]=[] for month in range(1,13): if month<10: month="0"+str(month) bid_month=str(year)+str(month) sql='select count(0) from t_bid_data where bid_month="%s"' % (bid_month) cur.execute(sql) r=cur.fetchone() if r[0]==0: print('no data from ' +bid_month) continue data["months"].append(bid_month) for second in range(30,60): system_time='11:29:'+str(second) sql='select lowest_price,stand_regress_value,final_margin_price from t_bid_data where bid_month="%s" and system_time="%s"' % (bid_month,system_time) cur.execute(sql) r=cur.fetchone() lowest_price=0 stand_regress_value=0 if cur.rowcount>0: lowest_price=r[0] stand_regress_value=int(r[1]) else: print(bid_month+' '+system_time+' no data ') print(bid_month+' '+system_time+' '+str(lowest_price)+' '+str(stand_regress_value)) data["rows"].append({"bid_month":bid_month,"system_time":system_time,"lowest_price":lowest_price,"stand_regress_value":stand_regress_value,"final_margin_price":r[2]}) file_bid_data=open('../data/bid_'+str(year)+'.json','w') file_bid_data.write(json.dumps(data)) file_bid_data.close()