async def x(index): log = Log(f"{from_}=>{to_}#{index}") retries = retries_ while True: found = False async for item in db.choose(M, {FIELD_NAME: from_}, {FIELD_NAME: to_}, limit_=limit): log.info(item=item) processed[(from_, to_)] += 1 await asyncio.sleep(sleep_time) found = True if found: break if retries == 0: return retries -= 1 log.warning(retry=retries_ - retries) await asyncio.sleep(sleep_time * sleep_coef) log.important("STOP")
class _Tasks: def __init__(self): self.log = Log(self.__class__.__name__) async def remove_tasks(self, tasks: Dict[asyncio.Task, Any]): assert isinstance(tasks, dict) finished = tuple(task for task in tasks.keys() if task.done()) for task in finished: if task.cancelled(): self.log.warning("Task was cancelled: ", task=task) else: try: result = await task if result: self.log.debug("Task say:", task=task, result=result) except RuntimeError: raise except Exception: self.log.exception("Task shout:", task=task) del tasks[task] return tasks
def add_warning(self, text, *args, **kwargs): """Add a warning message to logger""" text = '%s %s: %s' % (self.name_prefix, self.name, text) Log.warning(text, *args, **kwargs)
def get_load_avg(self): avg = os.getloadavg()[0] if avg >= self.max_avg: Log.warning('Average at %f (max: %d)' % (avg, self.max_avg)) self.fire(load_alert(avg, self.max_avg, time.time()))
class VK: def __init__(self, config_vk): self.log = Log("VK") self.config = config_vk self.additional_params = { 'access_token': self.config.token, 'lang': 'ru', 'v': "5.103" } self.person_fields = ",".join([ "first_name", "last_name", "deactivated", "verified", "sex", "bdate", "city", # TODO: https://vk.com/dev/places.getCityById "country", # TODO: https://vk.com/dev/places.getCountryById "home_town", "photo_400_orig", "online", "has_mobile", "contacts", "education", "universities", "schools", "last_seen", "occupation", "hidden", ]) self.group_info_fields = ",".join([ 'id', 'name', 'type', 'photo_200', 'city', 'description', 'place' ]) self.post_fields = ",".join([]) self.session: aiohttp.ClientSession = None self.last_call = 0 self.threshold = 1 / 3 self._update_token = None self.query_lock = asyncio.Lock() self.stats = VKStats('vk', "VK API") self.auth_lock = asyncio.Lock() async def warm_up(self): self.session = aiohttp.ClientSession() async def call_method(self, method, **params): self.log.debug(method=method, params=params) self.stats.call_methods_count += 1 try: while True: assert self.session is not None, "call `await .warm_up()` first" async with self.query_lock: if time() - self.threshold < self.last_call: self.log.deep("Sleep", threshold=self.threshold) await asyncio.sleep(self.threshold) self.last_call = time() self.stats.queries += 1 response = await self.session.get( url=f"{self.config['api_host']}{method}", params={ **params, **self.additional_params }, timeout=10) self.stats.by_type[method] += 1 result = await response.json() if 'error' in result: self.stats.errors += 1 vk_error = VKError(result['error']) if vk_error.error_code == VKError.TOO_MANY_REQUESTS: self.stats.errors_too_many += 1 self.threshold *= 1.1 if self.threshold > 1: self.threshold = 1 self.log.warning("Too many requests", threshold=self.threshold) continue if vk_error.error_code == VKError.INVALID_SESSION: self.log.warning( "Need auth. Please run <app> <config> auth") break if vk_error.error_code == VKError.PROFILE_PRIVATE: self.log.warning("Profile private", method=method, params=params) break if vk_error.error_code == VKError.DELETED_OR_BANNED: self.log.warning("Profile deleted or banned", method=method, params=params) break if vk_error.error_code == VKError.RATE_LIMIT_REACHED: self.log.important("RATE LIMIT") raise vk_error else: self.stats.success += 1 assert 'response' in result self.threshold *= 0.991 return result['response'] finally: self.stats.call_methods_count -= 1 self.stats.threshold = self.threshold async def persons_info(self, *user_ids) -> Sequence[VKPerson]: answer = await self.call_method("users.get", user_ids=",".join(map(str, user_ids)), fields=self.person_fields) users = [] for user_info in answer: users.append(VKPerson(**user_info)) return users async def me(self) -> VKPerson: return (await self.persons_info(self.config.user_id))[0] async def group_info(self, group_id) -> VKGroup: answer = await self.call_method("groups.getById", group_id=group_id, fields=self.group_info_fields) assert len(answer) == 1 group = answer[0] return VKGroup(**group) async def person_posts(self, person_id, count): return [post async for post in self._posts_count(person_id, count)] async def person_posts_iter(self, person_id, count=None): async for post in self._posts_count(person_id, count): yield post async def group_posts_iter(self, group_id, count=None): async for post in self._posts_count(-group_id, count): yield post async def comments_iter(self, owner_id, post_id, count=None): async for raw_data in self._offsetter( count, dict( method='wall.getComments', owner_id=owner_id, post_id=post_id, need_likes=1, preview_length=0, extended=0, thread_items_count=10, )): comment = VKComment(**raw_data) comment.post_id = post_id comment.owner_id = owner_id yield comment async def group_posts(self, group_id, count=None, from_ts=None): if count is not None and from_ts is not None: raise ValueError("Use one of attribute: `count` or `from_ts`") return [post async for post in self._posts_count(-group_id, count)] async def _posts_count(self, owner_id, count): async for post in self._offsetter( count, dict(method="wall.get", owner_id=owner_id, fields=self.post_fields)): yield VKPost(**post) async def _offsetter(self, count, params): # TODO: Can be optimized! Use asyncio.gather after first query, Luke! if count is None: count = float("+inf") if count < 1: raise ValueError(f"{count=} must be more than 0") offset = 0 items_count = count while offset < items_count: to_download = min(items_count - offset, 100) try: answer = await self.call_method(**params, count=to_download, offset=offset) except VKError: self.log.exception(params=params) raise if answer is None: # Good error in call_method return items_count = min(count, answer['count']) if to_download != len(answer['items']): if to_download < items_count: self.log.warning("Downloaded items count:", wanted=to_download, actual=len(answer['items'])) offset += to_download for item in answer['items']: yield item async def group_user_ids(self, group_id, count=None) -> Sequence[int]: users = [] async for user_id in self.group_participants_iter(group_id, count): users.append(user_id) return users async def group_participants_iter(self, group_id, count=None): async for user_id in self._offsetter( count, dict(method="groups.getMembers", group_id=group_id)): yield user_id async def shutdown(self): if self.session: await self.session.close()
class BaseWork: start_time = time() MUTE_EXCEPTION = True PARALLEL = 10 INPUT_RETRIES = 0 WAIT_COEF = 1 need_stop = False work_ids: Dict[str, int] = {} def __init__(self): work_name = self.__class__.__name__ self.log = Log(work_name) self.log.debug("Register work") self.work_ids.setdefault(work_name, -1) self.work_ids[work_name] += 1 work_id = f"{work_name}_{self.work_ids[work_name]}" self.log.debug("Work registered", work_id=work_id) self.stat = Stats(work_id, work_name) self.log.debug("Run task manager") self.task_manager = TasksManager(self.PARALLEL) self.tasks: List[TaskInfo] = [] self.state = "Base class initialized" @property def state(self): return self.stat.state @state.setter def state(self, value): self.stat.state = value self.log.debug(self.state) async def warm_up(self): pass async def input(self): yield raise NotImplementedError() async def process(self, item): yield raise NotImplementedError() async def update(self, result): raise NotImplementedError() async def shutdown(self): pass async def __call__(self): self.state = "🔥 Warming up" await self.warm_up() self.stat._start_time = time() try: await self.main_cycle() except Exception: self.log.exception("MAIN CYCLE") if not self.MUTE_EXCEPTION: raise self.stat.finished_time = time() self.state = "🛑 Shutdown" await self.shutdown() self.state = "🏁 Finished" async def main_cycle(self): self.state = "⌛️ Ready to start" await asyncio.gather(self._input_cycle(), self._result_cycle()) async def _result_cycle(self): while True: try: result = await asyncio.wait_for(self.task_manager.take(), 1) except asyncio.TimeoutError: continue if isinstance(result, TasksManager.Finish): break await self.update(result) self.stat.updated_items += 1 async def _input_cycle(self): self.stat.retries = 0 while not self.need_stop: self.state = "🔎 Wait for new item" async for item in self.input(): self.stat.input_items += 1 await self.task_manager.put(self._run_task(TaskInfo(item))) self.stat.retries = None if self.INPUT_RETRIES == 0: # Need to run only one time self.need_stop = True continue if self.stat.retries is None: # Item found self.stat.retries = 0 await asyncio.sleep(0) continue if self.stat.retries >= self.INPUT_RETRIES: self.log.warning("Too many retries, i'm done", retries=self.stat.retries) self.need_stop = True continue # Retry logic self.stat.retries += 1 self.state = f"🔎 Wait items, repeat №{self.stat.retries}" await asyncio.sleep(self.stat.retries * self.WAIT_COEF) await self.task_manager.stop() async def _run_task(self, info: TaskInfo): self.tasks.append(info) info.update("🎬 Task started") info.update(f"🛠 Processing") async for result in self.process(info.item): self.stat.returned_items += 1 info.update(f"🛠 {repr(result)}") yield result info.update(f"🛠 Processing") self.stat.processed_items += 1 info.update("✅ Finish processing") if info.processed_callback: info.update("🤙 Run callback") self.log.info("Run processed callback", processed_callback=info.processed_callback) await info.processed_callback self.stat.finished_items += 1 info.update("🏁 Task finished") self.tasks.remove(info) async def take_error(self): return await self.task_manager.take_error()