def task_getlist(disk_id, path, drive_id): if path: path = path else: path = '' res = logic.get_one_file_list(disk_id, path) thread_list = [] # 线程存放列表 try: # 创建集合 - 不添加一条数据,集合是不会创建的,因为MongoDB是惰性数据库 drivename = "syn_drive_" + str(drive_id) collection = MongoDB.db[drivename] for i in res["data"]: if "folder" in i.keys(): common.send_socket( drive_id, "{} | 拉取 {} 缓存数据".format( time.strftime('%Y-%m-%d %H:%M:%S'), i["name"])) dic = { "id": i["id"], "parentReference": i["parentReference"]["id"], "name": i["name"], "file": "folder", "path": i["parentReference"]["path"].replace("/drive/root:", "") } collection.insert_one(dic) t = threading.Thread(target=task_getlist, args=( disk_id, "/" + path + "/" + i["name"], drive_id, )) thread_list.append(t) else: t = threading.Thread(target=task_write, args=( drive_id, i, )) thread_list.append(t) for t in thread_list: t.start() for t in thread_list: t.join() except: task_getlist(disk_id, path, drive_id)
def task_write(disk_id, data): common.send_socket( drive_id, "{} | 入库 {}".format(time.strftime('%Y-%m-%d %H:%M:%S'), data["name"])) # 创建集合 - 不添加一条数据,集合是不会创建的,因为MongoDB是惰性数据库 drivename = "syn_drive_" + str(drive_id) collection = MongoDB.db[drivename] dic = { "id": data["id"], "parentReference": data["parentReference"]["id"], "name": data["name"], "file": data["file"]["mimeType"], "path": data["parentReference"]["path"].replace("/drive/root:", ""), "syn_disk": [] } collection.insert_one(dic)
def pull_chief_dirve_info(drive_id): drivename = "syn_drive_" + str(drive_id) collectionList = MongoDB.db.list_collection_names() chief_id = driveModels.drive_list.find_by_chief(drive_id).id # 查询主盘 if drivename not in collectionList: # 如果不存在集合,就进行主盘信息拉取 task_getlist(chief_id, '', drive_id) contrast_dif(drive_id) # 差异对比 collection = MongoDB.db[drivename] time.sleep(3) driveData = getMongoDB(drivename) for item in driveData: if item["syn_disk"]: common.send_socket( drive_id, "{} | 拉取 {} 的下载地址".format(time.strftime('%Y-%m-%d %H:%M:%S'), item["name"])) # 拉取下载地址 down_info = pull_dirve_file(chief_id, item["id"]) if down_info is None: # 报错二次拉取 down_info = pull_dirve_file(chief_id, item["id"]) common.send_socket( drive_id, "{} | 拉取 {} 的下载地址完成".format(time.strftime('%Y-%m-%d %H:%M:%S'), item["name"])) # 下载文件 down_result = down.down_file(down_info["url"], down_info["name"], drive_id) # 下载文件完成 if down_result: # print("Start Syn File: " + item["name"]) common.send_socket( drive_id, "{} | 开始同步 {}".format(time.strftime('%Y-%m-%d %H:%M:%S'), item["name"])) # 同步到各个盘 uploads.upProcess(drive_id, down_info["name"], item["path"]) common.send_socket( drive_id, "{} | 同步 {} 完成,删除同步缓存数据ID".format( time.strftime('%Y-%m-%d %H:%M:%S'), item["name"])) # 删除缓存数据库数据 collection.delete_one({"id": item["id"]}) MongoDB.db[drivename].remove() # 移除集合所有数据 MongoDB.db[drivename].drop() # 删除集合 common.send_socket(drive_id, "{} | 同步完成!".format(time.strftime('%Y-%m-%d %H:%M:%S')))
def download(self): common.send_socket( self.drive_id, "{} | 开始下载进程 {} | {}".format(time.strftime('%Y-%m-%d %H:%M:%S'), self.getName(), time.time())) # print("start thread:%s at %s" % (self.getName(), time.time())) headers = {"Range": "bytes=%s-%s" % (self.startpos, self.endpos)} res = requests.get(self.url, headers=headers) # res.text 是将get获取的byte类型数据自动编码,是str类型, res.content是原始的byte类型数据 # 所以下面是直接write(res.content) self.fd.seek(self.startpos) self.fd.write(res.content) common.send_socket( self.drive_id, "{} | 结束下载进程 {} | {}".format(time.strftime('%Y-%m-%d %H:%M:%S'), self.getName(), time.time())) # print("stop thread:%s at %s" % (self.getName(), time.time())) self.fd.close()
def thread_dif_one(name, path, drive_id, disk_id): collection = MongoDB.db["drive_" + str(disk_id)] res = collection.find_one({"name": name, "path": path}) if res is None: synDb = MongoDB.db["syn_drive_" + str(drive_id)] common.send_socket( drive_id, "{} | 发现差异,Disk_id: {}".format(time.strftime('%Y-%m-%d %H:%M:%S'), disk_id)) synDb_get_one = synDb.find_one({"name": name, "path": path}) syn_disk = synDb_get_one["syn_disk"] if disk_id not in syn_disk: syn_disk.append(disk_id) synDb.update_one({ "name": name, "path": path }, {"$set": { "syn_disk": syn_disk }})
def contrast_dif(drive_id): common.send_socket( drive_id, "{} | 开始对比差异".format(time.strftime('%Y-%m-%d %H:%M:%S'))) disk_list = driveModels.drive_list.find_by_drive_id(drive_id) disk_list_res = [] # 列出缓存集 for item in disk_list: if item.chief != "1": disk_list_res.append("drive_" + str(item.id)) # 缓存集差异对比 for dbname in disk_list_res: collectionList = MongoDB.db.list_collection_names() if dbname in collectionList: # 如果存在集合,就进行差异对比 contrast_dif_one_disk(dbname, drive_id) else: disk_id = dbname.replace("drive_", "") common.send_socket( drive_id, "{} | 发现差异,Disk_id: {}".format( time.strftime('%Y-%m-%d %H:%M:%S'), disk_id)) synDb = MongoDB.db["syn_drive_" + str(drive_id)] synDb_data = synDb.find() for item in synDb_data: if item["file"] != "folder": syn_disk = item["syn_disk"] if disk_id not in syn_disk: syn_disk.append(disk_id) synDb.update_one( { "name": item["name"], "path": item["path"] }, {"$set": { "syn_disk": syn_disk }}) common.send_socket( drive_id, "{} | 对比差异完成".format(time.strftime('%Y-%m-%d %H:%M:%S')))
def upProcess(drive_id, fileName, remotePath): data_list = driveModels.drive_list.find_by_drive_id(drive_id) collection = MongoDB.db["syn_drive_" + str(drive_id)] for item in data_list: if item.chief == "0": dbRes = collection.find_one({"name": fileName, "path": remotePath}) if str(item.id) in dbRes["syn_disk"]: common.send_socket( drive_id, "{} | 开始同步 {} 到 [ {} ] 网盘".format( time.strftime('%Y-%m-%d %H:%M:%S'), fileName, item.title)) if item.chief == "0": filesize = os.path.getsize(os.getcwd() + "/temp_uploads/syn_temp/" + str(drive_id) + "/" + fileName) if filesize > 4194304: res = putfilebig(item.id, drive_id, fileName, remotePath) else: res = putfilesmall(item.id, drive_id, fileName, remotePath) common.send_socket( drive_id, "{} | 同步 {} 到 [ {} ] 网盘完成".format( time.strftime('%Y-%m-%d %H:%M:%S'), fileName, item.title)) common.send_socket( drive_id, "{} | {} 所有网盘同步完成,删除缓存文件".format(time.strftime('%Y-%m-%d %H:%M:%S'), fileName)) target_filename = os.getcwd() + "/temp_uploads/syn_temp/" + str( drive_id) + "/" + fileName os.remove(target_filename) # 删除文件
def down_file(url, fileName, drive_id): global semlock # 获取文件的大小和文件名 filename = "{}/temp_uploads/syn_temp/{}/{}".format(os.getcwd(), drive_id, fileName) if not os.path.exists("{}/temp_uploads/syn_temp/{}".format( os.getcwd(), drive_id)): common.send_socket( drive_id, "{} | 创建网盘同步临时缓存目录".format(time.strftime('%Y-%m-%d %H:%M:%S'))) # 如果不存在则创建目录 os.makedirs("{}/temp_uploads/syn_temp/{}".format( os.getcwd(), drive_id)) try: filesize = int(requests.head(url).headers['Content-Length']) except: down_file(url, fileName, drive_id) common.send_socket( drive_id, "{} | 多线程下载 {} | 文件大小: {}".format(time.strftime('%Y-%m-%d %H:%M:%S'), fileName, filesize)) # print("%s filesize:%s" % (filename, filesize)) # 线程数 threadnum = 5 # 信号量,同时只允许10个线程运行 semlock = threading.BoundedSemaphore(threadnum) # 默认3线程现在,也可以通过传参的方式设置线程数 step = filesize // threadnum - 100000000 # 文件分块如果大于虚拟内存 则执行优化分块大小 if step > psutil.virtual_memory().free // threadnum: threadnum_size = threadnum while step > psutil.virtual_memory().free // threadnum: step = filesize // threadnum_size threadnum_size += 1 else: step = filesize // threadnum # exit() mtd_list = [] start = 0 end = -1 # 请空并生成文件 tempf = open(filename, 'w') tempf.close() # rb+ ,二进制打开,可任意位置读写 with open(filename, 'rb+') as f: fileno = f.fileno() # 如果文件大小为11字节,那就是获取文件0-10的位置的数据。如果end = 10,说明数据已经获取完了。 while end < filesize - 1: semlock.acquire() start = end + 1 end = start + step - 1 if end > filesize: end = filesize # print("start:%s, end:%s"%(start,end)) # 复制文件句柄 dup = os.dup(fileno) # print(dup) # 打开文件 fd = os.fdopen(dup, 'rb+', -1) # print(fd) t = MulThreadDownload(drive_id, url, start, end, fd) t.start() mtd_list.append(t) for i in mtd_list: i.join() common.send_socket( drive_id, "{} | 下载 {} 完成".format(time.strftime('%Y-%m-%d %H:%M:%S'), fileName)) return True # 完成单个文件下载 # if __name__ == "__main__": # url = 'https://splogs-my.sharepoint.com/personal/test_my365_ws/_layouts/15/download.aspx?UniqueId=76f5ee8a-c0fc-48ac-ba27-9a2c50355d74&Translate=false&tempauth=eyJ0eXAiOiJKV1QiLCJhbGciOiJub25lIn0.eyJhdWQiOiIwMDAwMDAwMy0wMDAwLTBmZjEtY2UwMC0wMDAwMDAwMDAwMDAvc3Bsb2dzLW15LnNoYXJlcG9pbnQuY29tQGRlOWMyMzNkLTM3Y2MtNDQ2Yy1hNWVlLTI1ZjBmMDZhNGZiYiIsImlzcyI6IjAwMDAwMDAzLTAwMDAtMGZmMS1jZTAwLTAwMDAwMDAwMDAwMCIsIm5iZiI6IjE1NTM0MDM1MDIiLCJleHAiOiIxNTUzNDA3MTAyIiwiZW5kcG9pbnR1cmwiOiJmRlg5dUc5UzFCRUpZbTM5MERhNUFNYkJmMmIveDhmRkVhRFlsZlQ3YnBJPSIsImVuZHBvaW50dXJsTGVuZ3RoIjoiMTQzIiwiaXNsb29wYmFjayI6IlRydWUiLCJjaWQiOiJObVZsWm1Nek1XRXROelppT0MwME1HVTJMVGd3TWprdE9UTTRNRFZrTkRZME5EUTUiLCJ2ZXIiOiJoYXNoZWRwcm9vZnRva2VuIiwic2l0ZWlkIjoiTUdaak9UbGxNekl0WTJSaFpTMDBOVFV6TFdJeVltWXRZV1U0WlRreU16VXdORGt4IiwiYXBwX2Rpc3BsYXluYW1lIjoidGVzdGFwcCIsImFwcGlkIjoiM2I3OWQ5NGYtYThiOC00ZWU0LWI3ZDctNDFlMjY5NzkwNjM4IiwidGlkIjoiZGU5YzIzM2QtMzdjYy00NDZjLWE1ZWUtMjVmMGYwNmE0ZmJiIiwidXBuIjoidGVzdEBteTM2NS53cyIsInB1aWQiOiIxMDAzMjAwMDQwNUQ2RTJBIiwiY2FjaGVrZXkiOiIwaC5mfG1lbWJlcnNoaXB8MTAwMzIwMDA0MDVkNmUyYUBsaXZlLmNvbSIsInNjcCI6ImFsbGZpbGVzLndyaXRlIiwidHQiOiIyIiwidXNlUGVyc2lzdGVudENvb2tpZSI6bnVsbH0.eFgzMHFFaXJUQkZ6bEt3bThGaFBQMlZsL01HUWdNR2wwbjdVQzNsOUZEQT0&ApiVersion=2.0' # fileName = 'douyintest1.mp4' # down_file(url, fileName)