class DouyinDownloader(object): """docstring for DouyinDownloader""" def __init__(self, download_dir = '/tmp/download/'): super(DouyinDownloader, self).__init__() # self.arg = arg self.downloadCount = 0 self.uploadCount = 0 if download_dir.endswith('/'): self.downloadDir = download_dir else: self.downloadDir = download_dir + '/' # redis connect self.redisClient = redis.StrictRedis(host='115.159.157.98', port=17379, db=0, password='******') # mysql self.mysqlDB = Database(host='localhost', user='******', passwd='zx#Video2018', database='video') # nextcloud self.oc = owncloud.Client('http://127.0.0.1:18080') self.oc.login('zhangxu', 'zx@12346') self.oc.logout() self.oc2 = owncloud.Client('http://115.29.34.236:18181') self.oc2.login('zhangxu', 'zx@12346') self.oc2.logout() def downloadAll(self): while True: try: task = self.getTask() if task != None: print("get one task, to download") print(task) # 判断是否已经存在 exist = self.videoExist(task) if exist == True: print("video is exist, return") else: self.downloadOne(task) except Exception as e: print("downloadAll error:") print(e) time.sleep(1) def getTask(self): # 从任务队列中取任务 task = self.redisClient.rpop("douyinTask") return task def downloadOne(self, task): nowtime = time.time() filename = str(nowtime) + ".mp4" # dirname = time.strftime("%Y-%m-%d", time.localtime()) filepath = self.downloadDir + filename downloadDirNow = self.downloadDir print("to download video:" + filepath) # prepare dir if os.path.exists(downloadDirNow) == False: os.makedirs(downloadDirNow, 666) # stream=True作用是推迟下载响应体直到访问Response.content属性 res = requests.get(task, stream=True) # 将视频写入文件夹 with open(filepath, 'ab') as f: f.write(res.content) f.flush() self.downloadCount = self.downloadCount + 1 print(filename + '下载完成' + ', count:' + str(self.downloadCount)) # 计算文件md5 file_md5 = self.get_file_md5(filepath) print("file md5:" + file_md5) # 使用md5值作为新文件名 new_file_path = downloadDirNow + file_md5 + ".mp4" videoInfo = self.mysqlDB.get_video_info_by_md5(file_md5) if videoInfo != None: # 文件已经存在 print("video same md5 is exist, return") # 删除文件 os.remove(filepath) else: # 重命名 os.rename(filepath, new_file_path) # upload print("upload file to local cloud.") cloudFilePath = self.uploadFileToCloud(new_file_path, file_md5 + ".mp4", self.oc) # 暂时不上传到远程云了, 网速太慢了。。。 # print("upload file to remote cloud.") # self.uploadFileToCloud(new_file_path, file_md5 + ".mp4", self.oc2) # 创建异步任务copy视频到远程云 # if len(cloudFilePath) > 0: # self.redisClient.lpush('copyTask', cloudFilePath) # 更新上传计数 self.uploadCount = self.uploadCount + 1 print("upload file count:" + str(self.uploadCount)) # 信息插入数据库 urlmd5 = hashlib.md5(task).hexdigest() result,msg = self.mysqlDB.insert_video_info(platform=1, status=0, title='', url=str(task, encoding = "utf-8") , md5=file_md5, urlmd5=urlmd5, storepath=new_file_path) print("insert_video_info:" + result) # 删除文件 os.remove(new_file_path) def videoExist(self, task): # 计算url的md5值,之后根据该值在数据库中查询, 从而判断该视频是否已经存在 urlmd5 = hashlib.md5(task).hexdigest() # 根据urlMd5 查询视频信息 videoInfo = self.mysqlDB.get_video_info(urlmd5) if videoInfo != None: return True return False def get_file_md5(self, file_path): f = open(file_path,'rb') md5_obj = hashlib.md5() while True: d = f.read(8096) if not d: break md5_obj.update(d) hash_code = md5_obj.hexdigest() f.close() md5 = str(hash_code).lower() return md5 def uploadFileToCloud(self, file_path, file_name, oc): try: oc.login('zhangxu', 'zx@12346') dirname = time.strftime("%Y-%m-%d", time.localtime()) dirname = dirname + '-%d' %(self.uploadCount/200) try: oc.mkdir(dirname) except Exception as e: print("mkdir failed:" + str(e)) # upload cloudFilePath = dirname + '/' + file_name oc.put_file(cloudFilePath, file_path) # logout oc.logout() print("uploadFileToCloud success, file:" + cloudFilePath) return cloudFilePath except Exception as e: print("uploadFileToCloud error:") print(e) return ""
class DouyinDownloader(object): """docstring for DouyinDownloader""" def __init__(self, download_dir='/tmp/download/'): super(DouyinDownloader, self).__init__() # self.arg = arg self.downloadCount = 0 self.uploadCount = 0 if download_dir.endswith('/'): self.downloadDir = download_dir else: self.downloadDir = download_dir + '/' # redis connect self.redisClient = redis.StrictRedis(host='115.159.157.98', port=17379, db=0, password='******') # mysql self.mysqlDB = Database(host='localhost', user='******', passwd='zx#Video2018', database='video') # nextcloud self.oc = owncloud.Client('http://127.0.0.1:18080') self.oc.login('zhangxu', 'zx@12346') self.oc.logout() self.oc2 = owncloud.Client('http://115.29.34.236:18181') self.oc2.login('zhangxu', 'zx@12346') self.oc2.logout() def downloadAll(self): while True: try: task = self.getTask() if task != None: print("get one task, to download") print(task) # 判断是否已经存在 exist = self.videoExist(task) if exist == True: print("video is exist, return") else: self.downloadOne(task) except Exception as e: print("downloadAll error:") print(e) time.sleep(1) def getTask(self): # 从任务队列中取任务 task = self.redisClient.rpop("douyinTask") return task def downloadOne(self, task): nowtime = time.time() filename = str(nowtime) + ".mp4" # dirname = time.strftime("%Y-%m-%d", time.localtime()) filepath = self.downloadDir + filename downloadDirNow = self.downloadDir print("to download video:" + filepath) # prepare dir if os.path.exists(downloadDirNow) == False: os.makedirs(downloadDirNow, 666) # stream=True作用是推迟下载响应体直到访问Response.content属性 res = requests.get(task, stream=True) # 将视频写入文件夹 with open(filepath, 'ab') as f: f.write(res.content) f.flush() self.downloadCount = self.downloadCount + 1 print(filename + '下载完成' + ', count:' + str(self.downloadCount)) # 计算文件md5 file_md5 = self.get_file_md5(filepath) print("file md5:" + file_md5) # 使用md5值作为新文件名 new_file_path = downloadDirNow + file_md5 + ".mp4" videoInfo = self.mysqlDB.get_video_info_by_md5(file_md5) if videoInfo != None: # 文件已经存在 print("video same md5 is exist, return") # 删除文件 os.remove(filepath) else: # 重命名 os.rename(filepath, new_file_path) # upload print("upload file to local cloud.") cloudFilePath = self.uploadFileToCloud(new_file_path, file_md5 + ".mp4", self.oc) # 暂时不上传到远程云了, 网速太慢了。。。 # print("upload file to remote cloud.") # self.uploadFileToCloud(new_file_path, file_md5 + ".mp4", self.oc2) # 创建异步任务copy视频到远程云 # if len(cloudFilePath) > 0: # self.redisClient.lpush('copyTask', cloudFilePath) # 更新上传计数 self.uploadCount = self.uploadCount + 1 print("upload file count:" + str(self.uploadCount)) # 信息插入数据库 urlmd5 = hashlib.md5(task).hexdigest() result, msg = self.mysqlDB.insert_video_info( platform=1, status=0, title='', url=str(task, encoding="utf-8"), md5=file_md5, urlmd5=urlmd5, storepath=new_file_path) print("insert_video_info:" + result) # 删除文件 os.remove(new_file_path) def videoExist(self, task): # 计算url的md5值,之后根据该值在数据库中查询, 从而判断该视频是否已经存在 urlmd5 = hashlib.md5(task).hexdigest() # 根据urlMd5 查询视频信息 videoInfo = self.mysqlDB.get_video_info(urlmd5) if videoInfo != None: return True return False def get_file_md5(self, file_path): f = open(file_path, 'rb') md5_obj = hashlib.md5() while True: d = f.read(8096) if not d: break md5_obj.update(d) hash_code = md5_obj.hexdigest() f.close() md5 = str(hash_code).lower() return md5 def uploadFileToCloud(self, file_path, file_name, oc): try: oc.login('zhangxu', 'zx@12346') dirname = time.strftime("%Y-%m-%d", time.localtime()) dirname = dirname + '-%d' % (self.uploadCount / 200) try: oc.mkdir(dirname) except Exception as e: print("mkdir failed:" + str(e)) # upload cloudFilePath = dirname + '/' + file_name oc.put_file(cloudFilePath, file_path) # logout oc.logout() print("uploadFileToCloud success, file:" + cloudFilePath) return cloudFilePath except Exception as e: print("uploadFileToCloud error:") print(e) return ""