def getInfor(): get_url = 'http://www.cbooo.cn/BoxOffice/GetTopRate' html = requests.get(get_url, headers=headers).text jsonData = json.loads(html) #print(jsonData) for i in range(0, 10): item = jsonData[i] date = time.strftime("%Y-%m-%d") data = (date, str(item["MovieName"]), str(item["BookingRate"]), str(item["AvgPeople"]), str(item["RateLine"]), str(item["PeopleLine"]), str(item["MovieID"])) sql = "INSERT INTO DailyAttendenceFromEN \ (itemDate,movieName,bookingRate,avgPeople,rateLine,peopleLine,mId) \ VALUES ('%s','%s','%s','%s','%s','%s','%s')" % tuple(data) # 插入数据库 db = connectMySQL() cursor = db.cursor() print(data) try: cursor.execute(sql) db.commit() print("commit sucess!") except: db.rollback() print("commit error!") cursor.close() cursor.close() db.close()
def getInfoFromMySQL(dates): db = connectMySQL() cursor = db.cursor() dayCount = 0 totalBoxOfficeSum = 0 for date in dates: dayCount = dayCount + 1 sql = "SELECT boxOffice FROM DailyBoxOffices WHERE itemDate ='%s' " % ( date) try: cursor.execute(sql) except: print("Error to execute " + sql) rows = cursor.fetchall() dailyBoxOfficeSum = 0 for row in rows: dailyBoxOfficeSum = dailyBoxOfficeSum + int(float(row[0])) # print(date+" "+str(dailyBoxOfficeSum)) totalBoxOfficeSum = totalBoxOfficeSum + dailyBoxOfficeSum print(date + " " + str(dailyBoxOfficeSum) + " " + str(totalBoxOfficeSum)) cursor.close() db.close() dailyBoxOfficeAVG = totalBoxOfficeSum / dayCount return int(dailyBoxOfficeAVG)
def getInfor(): get_url = 'http://www.cbooo.cn//BoxOffice/GetHourBoxOffice' html = requests.get(get_url, headers=headers).text jsonData = json.loads(html) #print(html) #print(jsonData["data1"]) #print(jsonData["data2"]) sumInformation = jsonData["data1"] itemDatas = jsonData["data2"] date = time.strftime("%Y-%m-%d") # print (date) for i in range(0, 11): item = itemDatas[i] #print(item) #以下是获取了七项数据 rank = item['Irank'] mId = item['mId'] movieName = item['MovieName'] boxOffice = item['BoxOffice'] sumBoxOffice = item['sumBoxOffice'] movieDay = item['movieDay'] #票房占比 boxProportion = item['boxPer'] data = (str(date), str(rank), str(movieName), str(boxOffice), str(sumBoxOffice), str(boxProportion), str(movieDay), str(mId)) sql = "INSERT INTO RealtimeDatasFromEN \ (itemDate,rank,movieName,boxOffice,sumBoxOffice,boxProportion,movieDay,mId) \ VALUES ('%s','%s','%s','%s','%s','%s','%s','%s')" % tuple(data) # 插入数据库 db = connectMySQL() cursor = db.cursor() print(data) try: cursor.execute(sql) db.commit() print("commit sucess!") except: db.rollback() print("commit error!") cursor.close() cursor.close() db.close()
def drawByDates(beginDate, endDate): #从此开始获取数据 db = connectMySQL() dates = dateRange(beginDate, endDate) boxOffices = [] for date in dates: boxOffice = int(getInforByDate(date, db)) #print(date+" "+str(boxOffice)) boxOffices.append(boxOffice) db.close() #从此开始做图 print(dates) print(boxOffices) plt.xlabel("Date(day)") #X轴标签 plt.ylabel("BoxOffices(WanYuan)") #Y轴标签 plt.title(beginDate + " to " + endDate + " dialy Box-Office") #标题 plt.plot(dates, boxOffices, color="red", linewidth=2) plt.ylim(0, 200000) plt.grid(True) plt.show()
def createTable(): db = connectMySQL() cursor = db.cursor() cursor.execute("DROP TABLE IF EXISTS DailyAttendenceFromEN") sql = """CREATE TABLE DailyAttendenceFromEN( itemID INT NOT NULL PRIMARY KEY AUTO_INCREMENT, itemDate DATE NOT NULL, movieName VARCHAR(45) NOT NULL, bookingRate CHAR(10), avgPeople CHAR(10), rateLine CHAR(10), peopleLine CHAR(10), mId CHAR(10))""" try: cursor.execute(sql) print("successed to create table!\n") except: print("error to create table!\n") cursor.close() db.close()
def createTable(): db = connectMySQL() cursor = db.cursor() cursor.execute("DROP TABLE IF EXISTS MovieType") sql = """ CREATE TABLE MovieType( itemID INT NOT NULL PRIMARY KEY AUTO_INCREMENT, movieName VARCHAR(45) NOT NULL, boxOffice VARCHAR(45) NOT NULL, firstType CHAR(10) NOT NULL, secondType CHAR(10), thirdType CHAR(10)) """ try: cursor.execute(sql) print("successed to create table!\n") except: print("error to create table!\n") cursor.close() db.close()
def createTable(): db = connectMySQL() cursor = db.cursor() cursor.execute("DROP TABLE IF EXISTS RealtimeDatasFromEN") sql = """CREATE TABLE RealtimeDatasFromEN( itemID INT NOT NULL PRIMARY KEY AUTO_INCREMENT, itemDate DATE NOT NULL, rank CHAR(10), movieName VARCHAR(45) NOT NULL, boxOffice CHAR(10), sumBoxOffice CHAR(10), boxProportion CHAR(10), movieDay CHAR(10), mId CHAR(10))""" try: cursor.execute(sql) print("successed to create table!\n") except: print("error to create table!\n") cursor.close() db.close()
def storeInfor(): file_reader = open('../data/movie_details.txt','r',encoding='utf-8') db = connectMySQL() for line in file_reader.readlines(): item = line.split(",") movieName = item[0] boxOffice = item[4].replace("万","") movieType = item[6].split("/") #数据的简单处理,将票房缺失值剔除 if(boxOffice == "--"): continue firstType = movieType[0] try: secondType = movieType[1] except: secondType = "--" try: thirdType = movieType[2] except: thirdType = "--" data = (str(movieName),str(boxOffice),str(firstType),str(secondType),str(thirdType)) print(data) # 插入数据库中 cursor = db.cursor() sql = "INSERT INTO MovieType (movieName,boxOffice,firstType,secondType,thirdType) \ VALUES ('%s','%s','%s','%s','%s')" % tuple(data) try: cursor.execute(sql) db.commit() print("successed to insert data",data) except: db.rollback() print("fail to insert data",da) cursor.close() db.close()
#coding:utf-8 #create by zhaotianxiang import pymysql import sys print(sys.getdefaultencoding()) from ConnectToMySQL import connectMySQL db = connectMySQL() #后面的编码格式极其重要,耽误了两个小时,下次一定操作数据的时候一定要注意设置编码的一致 cursor = db.cursor() def createTable(): cursor.execute("DROP TABLE IF EXISTS DailyBoxOffices") sql = """CREATE TABLE ( itemID INT NOT NULL PRIMARY KEY AUTO_INCREMENT, itemDate DATE NOT NULL, movieName VARCHAR(45) NOT NULL, boxOffice CHAR(10), proportion CHAR(10), attendence CHAR(10), releaseDays CHAR(10))""" try: cursor.execute(sql) except: print("error to create table!\n") def storeToDatabase(): file_read=open('../data/movie_erverday_information.txt','r',encoding='utf8') for line in file_read.readlines(): try: item = line.split(',')
def updateData(): post_url = "http://www.piaofang168.com/index.php/Jinzhun" html = requests.post(post_url, headers=headers) html_soup = BeautifulSoup(html.text, "html.parser") table = html_soup.find('div', attrs={ 'class': 'gross_total' }).find('table', attrs={'class': 'gross_table'}) tr = table.find_all('tr') #print(tr) date_data = table.find_all('h1')[0] date_str = str(date_data).replace('\r', '').replace('\n', '').replace( ' ', '').replace(' ', '').replace(',', '').replace(',', '') #print(date_str) date = re.findall('<h1>(.*?)周', date_str)[0] print(date) index = 0 for item_tr in tr[1:200]: item_td = item_tr.find_all('td') contentString = str(item_td).replace('\r', '').replace( '\n', '').replace(' ', '').replace(' ', '').replace(',', '').replace(',', '') #print(contentString) index = index + 1 if (index % 2 == 1): try: movie_name = re.findall('-bg">(.*?)<', str(contentString))[0] except: movie_name = '--' #电影当日票房 try: movie_daily_BoxOffice = re.findall('lor2">(.*?)<', str(contentString))[0] except: movie_daily_BoxOffice = '--' #电影总票房 try: movie_total_BoxOffice = re.findall('lor3">(.*?)<', str(contentString))[0] except: movie_total_BoxOffice = '--' #排片占比 try: percentage_screenings = re.findall('lor4">(.*?)<', str(contentString))[0] except: percentage_screenings = '--' #上座率 try: attendance = re.findall('lor5">(.*?)<', str(contentString))[0] except: attendance = '--' #上映天数 try: release_time = re.findall('lor6">(.*?)<', str(contentString))[0] except: release_time = '--' elif (index % 2 == 0): try: movie_name = re.findall('ybg2">(.*?)<', str(contentString))[0] except: movie_name = '--' #电影当日票房 try: movie_daily_BoxOffice = re.findall('c2">(.*?)<', str(contentString))[0] except: movie_daily_BoxOffice = '--' #电影总票房 try: movie_total_BoxOffice = re.findall('c3">(.*?)<', str(contentString))[0] except: movie_total_BoxOffice = '--' #排片占比 try: percentage_screenings = re.findall('c4">(.*?)<', str(contentString))[0] except: percentage_screenings = '--' #上座率 try: attendance = re.findall('c5">(.*?)<', str(contentString))[0] except: attendance = '--' #上映天数 try: release_time = re.findall('c6">(.*?)<', str(contentString))[0] except: release_time = '--' # to change the data as same formate text = date + ',' + movie_name + ',' + movie_daily_BoxOffice + ',' + movie_total_BoxOffice + ',' + percentage_screenings + ',' + attendance + ',' + release_time db = connectMySQL() storeToDatabase(db, text) db.close()