def upload(self, upload_path, filepath): """ upload_path 文件上传后的完整路径包括本身 filepath 本地文件路径 """ key = upload_path filename = filepath total_size = os.path.getsize(filename) # determine_part_size方法用来确定分片大小。 part_size = determine_part_size(total_size, preferred_size=100 * 1024) # 初始化分片。 upload_id = self.bucket.init_multipart_upload(key).upload_id parts = [] # 逐个上传分片。 with open(filename, 'rb') as fileobj: part_number = 1 offset = 0 while offset < total_size: num_to_upload = min(part_size, total_size - offset) # SizedFileAdapter(fileobj, size)方法会生成一个新的文件对象,重新计算起始追加位置。 result = self.bucket.upload_part( key, upload_id, part_number, SizedFileAdapter(fileobj, num_to_upload)) parts.append(PartInfo(part_number, result.etag)) offset += num_to_upload part_number += 1 # 完成分片上传。 self.bucket.complete_multipart_upload(key, upload_id, parts) # 验证分片上传。 with open(filename, 'rb') as fileobj: if not self.bucket.get_object(key).read() == fileobj.read(): msg = '上传' + filename + '出错,验证分片失败' print(msg) LogHelper.info(msg)
def send_data_to_server(self, data): headers = { "Content-Type": "application/json; charset=UTF-8", } response = requests.post(config.API_SERVER_URL, json=json.dumps(data), headers=headers) if response.status_code != 200: response = requests.post(config.API_SERVER_URL, json=json.dumps(data), headers=headers) if response.status_code != 200: print('同步数据到代理池失败') LogHelper.error('同步数据到代理池失败') LogHelper.error(data) LogHelper.error(response) else: text = response.text LogHelper.debug(text) print('同步完成,服务器返回信息:', end='') print(text)
def run(self): print('开始验证代理') data = SqlHelper.get(None, {'flag': self.data_flag}) print('共{0}条数据待验证'.format(len(data))) count = 0 #tasklist = [] #for row in data: # tasklist.append(gevent.spawn(self.check_proxy_and_save,row['ip'],row['port'])) #gevent.joinall(tasklist) thread_num = 50 with futures.ThreadPoolExecutor(max_workers=thread_num) as executor: param_left = len(data) param_iter = iter(data) jobs = {} while (param_left): for param in param_iter: job = executor.submit(self.check_proxy_and_save, param.ip, param.port) jobs[job] = param if len(jobs) > thread_num: break for job in futures.as_completed(jobs): param_left -= 1 #result = job.result() del jobs[job] break print('验证完成') save = True if save: count = len(self.proxies) if count <= 0: print('无有效代理,本次不同步到代理池') return print('共{0}条有效代理,开始同步到代理池'.format(count)) data = list(self.proxies) LogHelper.debug('开始同步:') try: self.send_data_to_server(data) except: try: self.send_data_to_server(data) except Exception as e: LogHelper.error('同步数据到代理池出错:' + str(e)) LogHelper.error(data)
def log(msg): print(msg) LogHelper.info(msg)