コード例 #1
0
def getInfor():
    get_url = 'http://www.cbooo.cn/BoxOffice/GetTopRate'
    html = requests.get(get_url, headers=headers).text
    jsonData = json.loads(html)
    #print(jsonData)
    for i in range(0, 10):
        item = jsonData[i]
        date = time.strftime("%Y-%m-%d")
        data = (date, str(item["MovieName"]), str(item["BookingRate"]),
                str(item["AvgPeople"]), str(item["RateLine"]),
                str(item["PeopleLine"]), str(item["MovieID"]))
        sql = "INSERT INTO DailyAttendenceFromEN \
		(itemDate,movieName,bookingRate,avgPeople,rateLine,peopleLine,mId) \
		VALUES ('%s','%s','%s','%s','%s','%s','%s')" % tuple(data)
        # 插入数据库
        db = connectMySQL()
        cursor = db.cursor()
        print(data)
        try:
            cursor.execute(sql)
            db.commit()
            print("commit sucess!")
        except:
            db.rollback()
            print("commit error!")
            cursor.close()
        cursor.close()
        db.close()
コード例 #2
0
def getInfoFromMySQL(dates):
    db = connectMySQL()
    cursor = db.cursor()
    dayCount = 0
    totalBoxOfficeSum = 0
    for date in dates:
        dayCount = dayCount + 1
        sql = "SELECT boxOffice FROM DailyBoxOffices WHERE itemDate ='%s' " % (
            date)
        try:
            cursor.execute(sql)
        except:
            print("Error to execute " + sql)

        rows = cursor.fetchall()
        dailyBoxOfficeSum = 0
        for row in rows:
            dailyBoxOfficeSum = dailyBoxOfficeSum + int(float(row[0]))
            # print(date+" "+str(dailyBoxOfficeSum))
        totalBoxOfficeSum = totalBoxOfficeSum + dailyBoxOfficeSum
        print(date + " " + str(dailyBoxOfficeSum) + " " +
              str(totalBoxOfficeSum))
    cursor.close()
    db.close()
    dailyBoxOfficeAVG = totalBoxOfficeSum / dayCount
    return int(dailyBoxOfficeAVG)
コード例 #3
0
def getInfor():
    get_url = 'http://www.cbooo.cn//BoxOffice/GetHourBoxOffice'
    html = requests.get(get_url, headers=headers).text
    jsonData = json.loads(html)
    #print(html)
    #print(jsonData["data1"])
    #print(jsonData["data2"])
    sumInformation = jsonData["data1"]
    itemDatas = jsonData["data2"]
    date = time.strftime("%Y-%m-%d")
    # print (date)
    for i in range(0, 11):
        item = itemDatas[i]
        #print(item)
        #以下是获取了七项数据
        rank = item['Irank']
        mId = item['mId']
        movieName = item['MovieName']
        boxOffice = item['BoxOffice']
        sumBoxOffice = item['sumBoxOffice']
        movieDay = item['movieDay']
        #票房占比
        boxProportion = item['boxPer']
        data = (str(date), str(rank), str(movieName), str(boxOffice),
                str(sumBoxOffice), str(boxProportion), str(movieDay), str(mId))
        sql = "INSERT INTO RealtimeDatasFromEN \
		(itemDate,rank,movieName,boxOffice,sumBoxOffice,boxProportion,movieDay,mId) \
		VALUES ('%s','%s','%s','%s','%s','%s','%s','%s')" % tuple(data)
        # 插入数据库
        db = connectMySQL()
        cursor = db.cursor()
        print(data)
        try:
            cursor.execute(sql)
            db.commit()
            print("commit sucess!")
        except:
            db.rollback()
            print("commit error!")
            cursor.close()
        cursor.close()
        db.close()
コード例 #4
0
def drawByDates(beginDate, endDate):
    #从此开始获取数据
    db = connectMySQL()
    dates = dateRange(beginDate, endDate)
    boxOffices = []
    for date in dates:
        boxOffice = int(getInforByDate(date, db))
        #print(date+" "+str(boxOffice))
        boxOffices.append(boxOffice)
    db.close()
    #从此开始做图
    print(dates)
    print(boxOffices)
    plt.xlabel("Date(day)")  #X轴标签
    plt.ylabel("BoxOffices(WanYuan)")  #Y轴标签
    plt.title(beginDate + " to " + endDate + " dialy Box-Office")  #标题
    plt.plot(dates, boxOffices, color="red", linewidth=2)
    plt.ylim(0, 200000)
    plt.grid(True)
    plt.show()
コード例 #5
0
def createTable():
    db = connectMySQL()
    cursor = db.cursor()
    cursor.execute("DROP TABLE IF EXISTS DailyAttendenceFromEN")

    sql = """CREATE TABLE DailyAttendenceFromEN(
    itemID INT NOT NULL PRIMARY KEY AUTO_INCREMENT,
    itemDate DATE NOT NULL,
    movieName VARCHAR(45) NOT NULL,
    bookingRate CHAR(10),
    avgPeople CHAR(10),
    rateLine CHAR(10),
    peopleLine CHAR(10),
    mId CHAR(10))"""
    try:
        cursor.execute(sql)
        print("successed to create table!\n")
    except:
        print("error to create table!\n")
    cursor.close()
    db.close()
コード例 #6
0
def createTable():
    db = connectMySQL()
    cursor = db.cursor()
    cursor.execute("DROP TABLE IF EXISTS MovieType")

    sql = """
    CREATE TABLE MovieType(
    itemID INT NOT NULL PRIMARY KEY AUTO_INCREMENT,
    movieName VARCHAR(45) NOT NULL,
    boxOffice VARCHAR(45) NOT NULL,
    firstType CHAR(10) NOT NULL,
    secondType CHAR(10),
    thirdType CHAR(10))
    """
    try:
        cursor.execute(sql)
        print("successed to create table!\n")
    except:
    	print("error to create table!\n")
    cursor.close()
    db.close()
コード例 #7
0
def createTable():
    db = connectMySQL()
    cursor = db.cursor()
    cursor.execute("DROP TABLE IF EXISTS RealtimeDatasFromEN")

    sql = """CREATE TABLE RealtimeDatasFromEN(
    itemID INT NOT NULL PRIMARY KEY AUTO_INCREMENT,
    itemDate DATE NOT NULL,
    rank CHAR(10),
    movieName VARCHAR(45) NOT NULL,
    boxOffice CHAR(10),
    sumBoxOffice CHAR(10),
    boxProportion CHAR(10),
    movieDay CHAR(10),
    mId CHAR(10))"""
    try:
        cursor.execute(sql)
        print("successed to create table!\n")
    except:
        print("error to create table!\n")
    cursor.close()
    db.close()
コード例 #8
0
def storeInfor():
	file_reader = open('../data/movie_details.txt','r',encoding='utf-8')
	db = connectMySQL()
	for line in file_reader.readlines():
		item = line.split(",")
		movieName = item[0]
		boxOffice = item[4].replace("万","")
		movieType = item[6].split("/")
		#数据的简单处理,将票房缺失值剔除
		if(boxOffice == "--"):
			continue
		firstType = movieType[0]
		try:
			secondType = movieType[1]
		except:
			secondType = "--"
		try:
			thirdType = movieType[2]
		except:
			thirdType = "--"
		data = (str(movieName),str(boxOffice),str(firstType),str(secondType),str(thirdType))
		print(data)

		# 插入数据库中
		
		
		cursor = db.cursor()
		sql = "INSERT INTO MovieType (movieName,boxOffice,firstType,secondType,thirdType) \
		VALUES ('%s','%s','%s','%s','%s')" % tuple(data)
		try:
			cursor.execute(sql)
			db.commit()
			print("successed to insert data",data)
		except:
			db.rollback()
			print("fail to insert data",da)

		cursor.close()
	db.close()
コード例 #9
0
#coding:utf-8 
#create by zhaotianxiang
import pymysql 
import sys
print(sys.getdefaultencoding())
from ConnectToMySQL import connectMySQL
db = connectMySQL()
#后面的编码格式极其重要,耽误了两个小时,下次一定操作数据的时候一定要注意设置编码的一致
cursor = db.cursor()

def createTable():
    cursor.execute("DROP TABLE IF EXISTS DailyBoxOffices")
    sql = """CREATE TABLE (
    itemID INT NOT NULL PRIMARY KEY AUTO_INCREMENT,
    itemDate DATE NOT NULL,
    movieName VARCHAR(45) NOT NULL,
    boxOffice CHAR(10),
    proportion CHAR(10),
    attendence CHAR(10),
    releaseDays CHAR(10))"""
    try:
        cursor.execute(sql)
    except:
    	print("error to create table!\n")
def storeToDatabase():

	file_read=open('../data/movie_erverday_information.txt','r',encoding='utf8')
	for line in file_read.readlines():

		try:
			item = line.split(',')
コード例 #10
0
def updateData():
    post_url = "http://www.piaofang168.com/index.php/Jinzhun"
    html = requests.post(post_url, headers=headers)
    html_soup = BeautifulSoup(html.text, "html.parser")
    table = html_soup.find('div', attrs={
        'class': 'gross_total'
    }).find('table', attrs={'class': 'gross_table'})
    tr = table.find_all('tr')
    #print(tr)
    date_data = table.find_all('h1')[0]
    date_str = str(date_data).replace('\r', '').replace('\n', '').replace(
        ' ', '').replace('	', '').replace(',', '').replace(',', '')
    #print(date_str)
    date = re.findall('<h1>(.*?)周', date_str)[0]
    print(date)
    index = 0
    for item_tr in tr[1:200]:
        item_td = item_tr.find_all('td')
        contentString = str(item_td).replace('\r', '').replace(
            '\n',
            '').replace(' ', '').replace('	', '').replace(',',
                                                          '').replace(',', '')
        #print(contentString)
        index = index + 1
        if (index % 2 == 1):
            try:
                movie_name = re.findall('-bg">(.*?)<', str(contentString))[0]
            except:
                movie_name = '--'
    #电影当日票房
            try:
                movie_daily_BoxOffice = re.findall('lor2">(.*?)<',
                                                   str(contentString))[0]
            except:
                movie_daily_BoxOffice = '--'
    #电影总票房
            try:
                movie_total_BoxOffice = re.findall('lor3">(.*?)<',
                                                   str(contentString))[0]
            except:
                movie_total_BoxOffice = '--'
    #排片占比
            try:
                percentage_screenings = re.findall('lor4">(.*?)<',
                                                   str(contentString))[0]
            except:
                percentage_screenings = '--'
    #上座率
            try:
                attendance = re.findall('lor5">(.*?)<', str(contentString))[0]
            except:
                attendance = '--'
    #上映天数
            try:
                release_time = re.findall('lor6">(.*?)<',
                                          str(contentString))[0]
            except:
                release_time = '--'
        elif (index % 2 == 0):
            try:
                movie_name = re.findall('ybg2">(.*?)<', str(contentString))[0]
            except:
                movie_name = '--'
    #电影当日票房
            try:
                movie_daily_BoxOffice = re.findall('c2">(.*?)<',
                                                   str(contentString))[0]
            except:
                movie_daily_BoxOffice = '--'
    #电影总票房
            try:
                movie_total_BoxOffice = re.findall('c3">(.*?)<',
                                                   str(contentString))[0]
            except:
                movie_total_BoxOffice = '--'
    #排片占比
            try:
                percentage_screenings = re.findall('c4">(.*?)<',
                                                   str(contentString))[0]
            except:
                percentage_screenings = '--'
    #上座率
            try:
                attendance = re.findall('c5">(.*?)<', str(contentString))[0]
            except:
                attendance = '--'
    #上映天数
            try:
                release_time = re.findall('c6">(.*?)<', str(contentString))[0]
            except:
                release_time = '--'
        # to change the data as same formate
        text = date + ',' + movie_name + ',' + movie_daily_BoxOffice + ',' + movie_total_BoxOffice + ',' + percentage_screenings + ',' + attendance + ',' + release_time
        db = connectMySQL()
        storeToDatabase(db, text)
        db.close()